1 | import libxml2mod
|
---|
2 | import types
|
---|
3 | import sys
|
---|
4 |
|
---|
5 | # The root of all libxml2 errors.
|
---|
6 | class libxmlError(Exception): pass
|
---|
7 |
|
---|
8 | # Type of the wrapper class for the C objects wrappers
|
---|
9 | def checkWrapper(obj):
|
---|
10 | try:
|
---|
11 | n = type(_obj).__name__
|
---|
12 | if n != 'PyCObject' and n != 'PyCapsule':
|
---|
13 | return 1
|
---|
14 | except:
|
---|
15 | return 0
|
---|
16 | return 0
|
---|
17 |
|
---|
18 | #
|
---|
19 | # id() is sometimes negative ...
|
---|
20 | #
|
---|
21 | def pos_id(o):
|
---|
22 | i = id(o)
|
---|
23 | if (i < 0):
|
---|
24 | return (sys.maxsize - i)
|
---|
25 | return i
|
---|
26 |
|
---|
27 | #
|
---|
28 | # Errors raised by the wrappers when some tree handling failed.
|
---|
29 | #
|
---|
30 | class treeError(libxmlError):
|
---|
31 | def __init__(self, msg):
|
---|
32 | self.msg = msg
|
---|
33 | def __str__(self):
|
---|
34 | return self.msg
|
---|
35 |
|
---|
36 | class parserError(libxmlError):
|
---|
37 | def __init__(self, msg):
|
---|
38 | self.msg = msg
|
---|
39 | def __str__(self):
|
---|
40 | return self.msg
|
---|
41 |
|
---|
42 | class uriError(libxmlError):
|
---|
43 | def __init__(self, msg):
|
---|
44 | self.msg = msg
|
---|
45 | def __str__(self):
|
---|
46 | return self.msg
|
---|
47 |
|
---|
48 | class xpathError(libxmlError):
|
---|
49 | def __init__(self, msg):
|
---|
50 | self.msg = msg
|
---|
51 | def __str__(self):
|
---|
52 | return self.msg
|
---|
53 |
|
---|
54 | class ioWrapper:
|
---|
55 | def __init__(self, _obj):
|
---|
56 | self.__io = _obj
|
---|
57 | self._o = None
|
---|
58 |
|
---|
59 | def io_close(self):
|
---|
60 | if self.__io == None:
|
---|
61 | return(-1)
|
---|
62 | self.__io.close()
|
---|
63 | self.__io = None
|
---|
64 | return(0)
|
---|
65 |
|
---|
66 | def io_flush(self):
|
---|
67 | if self.__io == None:
|
---|
68 | return(-1)
|
---|
69 | self.__io.flush()
|
---|
70 | return(0)
|
---|
71 |
|
---|
72 | def io_read(self, len = -1):
|
---|
73 | if self.__io == None:
|
---|
74 | return(-1)
|
---|
75 | try:
|
---|
76 | if len < 0:
|
---|
77 | ret = self.__io.read()
|
---|
78 | else:
|
---|
79 | ret = self.__io.read(len)
|
---|
80 | except Exception:
|
---|
81 | import sys
|
---|
82 | e = sys.exc_info()[1]
|
---|
83 | print("failed to read from Python:", type(e))
|
---|
84 | print("on IO:", self.__io)
|
---|
85 | self.__io == None
|
---|
86 | return(-1)
|
---|
87 |
|
---|
88 | return(ret)
|
---|
89 |
|
---|
90 | def io_write(self, str, len = -1):
|
---|
91 | if self.__io == None:
|
---|
92 | return(-1)
|
---|
93 | if len < 0:
|
---|
94 | return(self.__io.write(str))
|
---|
95 | return(self.__io.write(str, len))
|
---|
96 |
|
---|
97 | class ioReadWrapper(ioWrapper):
|
---|
98 | def __init__(self, _obj, enc = ""):
|
---|
99 | ioWrapper.__init__(self, _obj)
|
---|
100 | self._o = libxml2mod.xmlCreateInputBuffer(self, enc)
|
---|
101 |
|
---|
102 | def __del__(self):
|
---|
103 | print("__del__")
|
---|
104 | self.io_close()
|
---|
105 | if self._o != None:
|
---|
106 | libxml2mod.xmlFreeParserInputBuffer(self._o)
|
---|
107 | self._o = None
|
---|
108 |
|
---|
109 | def close(self):
|
---|
110 | self.io_close()
|
---|
111 | if self._o != None:
|
---|
112 | libxml2mod.xmlFreeParserInputBuffer(self._o)
|
---|
113 | self._o = None
|
---|
114 |
|
---|
115 | class ioWriteWrapper(ioWrapper):
|
---|
116 | def __init__(self, _obj, enc = ""):
|
---|
117 | # print "ioWriteWrapper.__init__", _obj
|
---|
118 | if type(_obj) == type(''):
|
---|
119 | print("write io from a string")
|
---|
120 | self.o = None
|
---|
121 | elif type(_obj).__name__ == 'PyCapsule':
|
---|
122 | file = libxml2mod.outputBufferGetPythonFile(_obj)
|
---|
123 | if file != None:
|
---|
124 | ioWrapper.__init__(self, file)
|
---|
125 | else:
|
---|
126 | ioWrapper.__init__(self, _obj)
|
---|
127 | self._o = _obj
|
---|
128 | # elif type(_obj) == types.InstanceType:
|
---|
129 | # print(("write io from instance of %s" % (_obj.__class__)))
|
---|
130 | # ioWrapper.__init__(self, _obj)
|
---|
131 | # self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
|
---|
132 | else:
|
---|
133 | file = libxml2mod.outputBufferGetPythonFile(_obj)
|
---|
134 | if file != None:
|
---|
135 | ioWrapper.__init__(self, file)
|
---|
136 | else:
|
---|
137 | ioWrapper.__init__(self, _obj)
|
---|
138 | self._o = _obj
|
---|
139 |
|
---|
140 | def __del__(self):
|
---|
141 | # print "__del__"
|
---|
142 | self.io_close()
|
---|
143 | if self._o != None:
|
---|
144 | libxml2mod.xmlOutputBufferClose(self._o)
|
---|
145 | self._o = None
|
---|
146 |
|
---|
147 | def flush(self):
|
---|
148 | self.io_flush()
|
---|
149 | if self._o != None:
|
---|
150 | libxml2mod.xmlOutputBufferClose(self._o)
|
---|
151 | self._o = None
|
---|
152 |
|
---|
153 | def close(self):
|
---|
154 | self.io_flush()
|
---|
155 | if self._o != None:
|
---|
156 | libxml2mod.xmlOutputBufferClose(self._o)
|
---|
157 | self._o = None
|
---|
158 |
|
---|
159 | #
|
---|
160 | # Example of a class to handle SAX events
|
---|
161 | #
|
---|
162 | class SAXCallback:
|
---|
163 | """Base class for SAX handlers"""
|
---|
164 | def startDocument(self):
|
---|
165 | """called at the start of the document"""
|
---|
166 | pass
|
---|
167 |
|
---|
168 | def endDocument(self):
|
---|
169 | """called at the end of the document"""
|
---|
170 | pass
|
---|
171 |
|
---|
172 | def startElement(self, tag, attrs):
|
---|
173 | """called at the start of every element, tag is the name of
|
---|
174 | the element, attrs is a dictionary of the element's attributes"""
|
---|
175 | pass
|
---|
176 |
|
---|
177 | def endElement(self, tag):
|
---|
178 | """called at the start of every element, tag is the name of
|
---|
179 | the element"""
|
---|
180 | pass
|
---|
181 |
|
---|
182 | def characters(self, data):
|
---|
183 | """called when character data have been read, data is the string
|
---|
184 | containing the data, multiple consecutive characters() callback
|
---|
185 | are possible."""
|
---|
186 | pass
|
---|
187 |
|
---|
188 | def cdataBlock(self, data):
|
---|
189 | """called when CDATA section have been read, data is the string
|
---|
190 | containing the data, multiple consecutive cdataBlock() callback
|
---|
191 | are possible."""
|
---|
192 | pass
|
---|
193 |
|
---|
194 | def reference(self, name):
|
---|
195 | """called when an entity reference has been found"""
|
---|
196 | pass
|
---|
197 |
|
---|
198 | def ignorableWhitespace(self, data):
|
---|
199 | """called when potentially ignorable white spaces have been found"""
|
---|
200 | pass
|
---|
201 |
|
---|
202 | def processingInstruction(self, target, data):
|
---|
203 | """called when a PI has been found, target contains the PI name and
|
---|
204 | data is the associated data in the PI"""
|
---|
205 | pass
|
---|
206 |
|
---|
207 | def comment(self, content):
|
---|
208 | """called when a comment has been found, content contains the comment"""
|
---|
209 | pass
|
---|
210 |
|
---|
211 | def externalSubset(self, name, externalID, systemID):
|
---|
212 | """called when a DOCTYPE declaration has been found, name is the
|
---|
213 | DTD name and externalID, systemID are the DTD public and system
|
---|
214 | identifier for that DTd if available"""
|
---|
215 | pass
|
---|
216 |
|
---|
217 | def internalSubset(self, name, externalID, systemID):
|
---|
218 | """called when a DOCTYPE declaration has been found, name is the
|
---|
219 | DTD name and externalID, systemID are the DTD public and system
|
---|
220 | identifier for that DTD if available"""
|
---|
221 | pass
|
---|
222 |
|
---|
223 | def entityDecl(self, name, type, externalID, systemID, content):
|
---|
224 | """called when an ENTITY declaration has been found, name is the
|
---|
225 | entity name and externalID, systemID are the entity public and
|
---|
226 | system identifier for that entity if available, type indicates
|
---|
227 | the entity type, and content reports it's string content"""
|
---|
228 | pass
|
---|
229 |
|
---|
230 | def notationDecl(self, name, externalID, systemID):
|
---|
231 | """called when an NOTATION declaration has been found, name is the
|
---|
232 | notation name and externalID, systemID are the notation public and
|
---|
233 | system identifier for that notation if available"""
|
---|
234 | pass
|
---|
235 |
|
---|
236 | def attributeDecl(self, elem, name, type, defi, defaultValue, nameList):
|
---|
237 | """called when an ATTRIBUTE definition has been found"""
|
---|
238 | pass
|
---|
239 |
|
---|
240 | def elementDecl(self, name, type, content):
|
---|
241 | """called when an ELEMENT definition has been found"""
|
---|
242 | pass
|
---|
243 |
|
---|
244 | def entityDecl(self, name, publicId, systemID, notationName):
|
---|
245 | """called when an unparsed ENTITY declaration has been found,
|
---|
246 | name is the entity name and publicId,, systemID are the entity
|
---|
247 | public and system identifier for that entity if available,
|
---|
248 | and notationName indicate the associated NOTATION"""
|
---|
249 | pass
|
---|
250 |
|
---|
251 | def warning(self, msg):
|
---|
252 | #print msg
|
---|
253 | pass
|
---|
254 |
|
---|
255 | def error(self, msg):
|
---|
256 | raise parserError(msg)
|
---|
257 |
|
---|
258 | def fatalError(self, msg):
|
---|
259 | raise parserError(msg)
|
---|
260 |
|
---|
261 | #
|
---|
262 | # This class is the ancestor of all the Node classes. It provides
|
---|
263 | # the basic functionalities shared by all nodes (and handle
|
---|
264 | # gracefylly the exception), like name, navigation in the tree,
|
---|
265 | # doc reference, content access and serializing to a string or URI
|
---|
266 | #
|
---|
267 | class xmlCore:
|
---|
268 | def __init__(self, _obj=None):
|
---|
269 | if _obj != None:
|
---|
270 | self._o = _obj;
|
---|
271 | return
|
---|
272 | self._o = None
|
---|
273 |
|
---|
274 | def __eq__(self, other):
|
---|
275 | if other == None:
|
---|
276 | return False
|
---|
277 | ret = libxml2mod.compareNodesEqual(self._o, other._o)
|
---|
278 | if ret == None:
|
---|
279 | return False
|
---|
280 | return ret == True
|
---|
281 | def __ne__(self, other):
|
---|
282 | if other == None:
|
---|
283 | return True
|
---|
284 | ret = libxml2mod.compareNodesEqual(self._o, other._o)
|
---|
285 | return not ret
|
---|
286 | def __hash__(self):
|
---|
287 | ret = libxml2mod.nodeHash(self._o)
|
---|
288 | return ret
|
---|
289 |
|
---|
290 | def __str__(self):
|
---|
291 | return self.serialize()
|
---|
292 | def get_parent(self):
|
---|
293 | ret = libxml2mod.parent(self._o)
|
---|
294 | if ret == None:
|
---|
295 | return None
|
---|
296 | return nodeWrap(ret)
|
---|
297 | def get_children(self):
|
---|
298 | ret = libxml2mod.children(self._o)
|
---|
299 | if ret == None:
|
---|
300 | return None
|
---|
301 | return nodeWrap(ret)
|
---|
302 | def get_last(self):
|
---|
303 | ret = libxml2mod.last(self._o)
|
---|
304 | if ret == None:
|
---|
305 | return None
|
---|
306 | return nodeWrap(ret)
|
---|
307 | def get_next(self):
|
---|
308 | ret = libxml2mod.next(self._o)
|
---|
309 | if ret == None:
|
---|
310 | return None
|
---|
311 | return nodeWrap(ret)
|
---|
312 | def get_properties(self):
|
---|
313 | ret = libxml2mod.properties(self._o)
|
---|
314 | if ret == None:
|
---|
315 | return None
|
---|
316 | return xmlAttr(_obj=ret)
|
---|
317 | def get_prev(self):
|
---|
318 | ret = libxml2mod.prev(self._o)
|
---|
319 | if ret == None:
|
---|
320 | return None
|
---|
321 | return nodeWrap(ret)
|
---|
322 | def get_content(self):
|
---|
323 | return libxml2mod.xmlNodeGetContent(self._o)
|
---|
324 | getContent = get_content # why is this duplicate naming needed ?
|
---|
325 | def get_name(self):
|
---|
326 | return libxml2mod.name(self._o)
|
---|
327 | def get_type(self):
|
---|
328 | return libxml2mod.type(self._o)
|
---|
329 | def get_doc(self):
|
---|
330 | ret = libxml2mod.doc(self._o)
|
---|
331 | if ret == None:
|
---|
332 | if self.type in ["document_xml", "document_html"]:
|
---|
333 | return xmlDoc(_obj=self._o)
|
---|
334 | else:
|
---|
335 | return None
|
---|
336 | return xmlDoc(_obj=ret)
|
---|
337 | #
|
---|
338 | # Those are common attributes to nearly all type of nodes
|
---|
339 | # defined as python2 properties
|
---|
340 | #
|
---|
341 | import sys
|
---|
342 | if float(sys.version[0:3]) < 2.2:
|
---|
343 | def __getattr__(self, attr):
|
---|
344 | if attr == "parent":
|
---|
345 | ret = libxml2mod.parent(self._o)
|
---|
346 | if ret == None:
|
---|
347 | return None
|
---|
348 | return nodeWrap(ret)
|
---|
349 | elif attr == "properties":
|
---|
350 | ret = libxml2mod.properties(self._o)
|
---|
351 | if ret == None:
|
---|
352 | return None
|
---|
353 | return xmlAttr(_obj=ret)
|
---|
354 | elif attr == "children":
|
---|
355 | ret = libxml2mod.children(self._o)
|
---|
356 | if ret == None:
|
---|
357 | return None
|
---|
358 | return nodeWrap(ret)
|
---|
359 | elif attr == "last":
|
---|
360 | ret = libxml2mod.last(self._o)
|
---|
361 | if ret == None:
|
---|
362 | return None
|
---|
363 | return nodeWrap(ret)
|
---|
364 | elif attr == "next":
|
---|
365 | ret = libxml2mod.next(self._o)
|
---|
366 | if ret == None:
|
---|
367 | return None
|
---|
368 | return nodeWrap(ret)
|
---|
369 | elif attr == "prev":
|
---|
370 | ret = libxml2mod.prev(self._o)
|
---|
371 | if ret == None:
|
---|
372 | return None
|
---|
373 | return nodeWrap(ret)
|
---|
374 | elif attr == "content":
|
---|
375 | return libxml2mod.xmlNodeGetContent(self._o)
|
---|
376 | elif attr == "name":
|
---|
377 | return libxml2mod.name(self._o)
|
---|
378 | elif attr == "type":
|
---|
379 | return libxml2mod.type(self._o)
|
---|
380 | elif attr == "doc":
|
---|
381 | ret = libxml2mod.doc(self._o)
|
---|
382 | if ret == None:
|
---|
383 | if self.type == "document_xml" or self.type == "document_html":
|
---|
384 | return xmlDoc(_obj=self._o)
|
---|
385 | else:
|
---|
386 | return None
|
---|
387 | return xmlDoc(_obj=ret)
|
---|
388 | raise AttributeError(attr)
|
---|
389 | else:
|
---|
390 | parent = property(get_parent, None, None, "Parent node")
|
---|
391 | children = property(get_children, None, None, "First child node")
|
---|
392 | last = property(get_last, None, None, "Last sibling node")
|
---|
393 | next = property(get_next, None, None, "Next sibling node")
|
---|
394 | prev = property(get_prev, None, None, "Previous sibling node")
|
---|
395 | properties = property(get_properties, None, None, "List of properies")
|
---|
396 | content = property(get_content, None, None, "Content of this node")
|
---|
397 | name = property(get_name, None, None, "Node name")
|
---|
398 | type = property(get_type, None, None, "Node type")
|
---|
399 | doc = property(get_doc, None, None, "The document this node belongs to")
|
---|
400 |
|
---|
401 | #
|
---|
402 | # Serialization routines, the optional arguments have the following
|
---|
403 | # meaning:
|
---|
404 | # encoding: string to ask saving in a specific encoding
|
---|
405 | # indent: if 1 the serializer is asked to indent the output
|
---|
406 | #
|
---|
407 | def serialize(self, encoding = None, format = 0):
|
---|
408 | return libxml2mod.serializeNode(self._o, encoding, format)
|
---|
409 | def saveTo(self, file, encoding = None, format = 0):
|
---|
410 | return libxml2mod.saveNodeTo(self._o, file, encoding, format)
|
---|
411 |
|
---|
412 | #
|
---|
413 | # Canonicalization routines:
|
---|
414 | #
|
---|
415 | # nodes: the node set (tuple or list) to be included in the
|
---|
416 | # canonized image or None if all document nodes should be
|
---|
417 | # included.
|
---|
418 | # exclusive: the exclusive flag (0 - non-exclusive
|
---|
419 | # canonicalization; otherwise - exclusive canonicalization)
|
---|
420 | # prefixes: the list of inclusive namespace prefixes (strings),
|
---|
421 | # or None if there is no inclusive namespaces (only for
|
---|
422 | # exclusive canonicalization, ignored otherwise)
|
---|
423 | # with_comments: include comments in the result (!=0) or not
|
---|
424 | # (==0)
|
---|
425 | def c14nMemory(self,
|
---|
426 | nodes=None,
|
---|
427 | exclusive=0,
|
---|
428 | prefixes=None,
|
---|
429 | with_comments=0):
|
---|
430 | if nodes:
|
---|
431 | nodes = [n._o for n in nodes]
|
---|
432 | return libxml2mod.xmlC14NDocDumpMemory(
|
---|
433 | self.get_doc()._o,
|
---|
434 | nodes,
|
---|
435 | exclusive != 0,
|
---|
436 | prefixes,
|
---|
437 | with_comments != 0)
|
---|
438 | def c14nSaveTo(self,
|
---|
439 | file,
|
---|
440 | nodes=None,
|
---|
441 | exclusive=0,
|
---|
442 | prefixes=None,
|
---|
443 | with_comments=0):
|
---|
444 | if nodes:
|
---|
445 | nodes = [n._o for n in nodes]
|
---|
446 | return libxml2mod.xmlC14NDocSaveTo(
|
---|
447 | self.get_doc()._o,
|
---|
448 | nodes,
|
---|
449 | exclusive != 0,
|
---|
450 | prefixes,
|
---|
451 | with_comments != 0,
|
---|
452 | file)
|
---|
453 |
|
---|
454 | #
|
---|
455 | # Selecting nodes using XPath, a bit slow because the context
|
---|
456 | # is allocated/freed every time but convenient.
|
---|
457 | #
|
---|
458 | def xpathEval(self, expr):
|
---|
459 | doc = self.doc
|
---|
460 | if doc == None:
|
---|
461 | return None
|
---|
462 | ctxt = doc.xpathNewContext()
|
---|
463 | ctxt.setContextNode(self)
|
---|
464 | res = ctxt.xpathEval(expr)
|
---|
465 | ctxt.xpathFreeContext()
|
---|
466 | return res
|
---|
467 |
|
---|
468 | # #
|
---|
469 | # # Selecting nodes using XPath, faster because the context
|
---|
470 | # # is allocated just once per xmlDoc.
|
---|
471 | # #
|
---|
472 | # # Removed: DV memleaks c.f. #126735
|
---|
473 | # #
|
---|
474 | # def xpathEval2(self, expr):
|
---|
475 | # doc = self.doc
|
---|
476 | # if doc == None:
|
---|
477 | # return None
|
---|
478 | # try:
|
---|
479 | # doc._ctxt.setContextNode(self)
|
---|
480 | # except:
|
---|
481 | # doc._ctxt = doc.xpathNewContext()
|
---|
482 | # doc._ctxt.setContextNode(self)
|
---|
483 | # res = doc._ctxt.xpathEval(expr)
|
---|
484 | # return res
|
---|
485 | def xpathEval2(self, expr):
|
---|
486 | return self.xpathEval(expr)
|
---|
487 |
|
---|
488 | # Remove namespaces
|
---|
489 | def removeNsDef(self, href):
|
---|
490 | """
|
---|
491 | Remove a namespace definition from a node. If href is None,
|
---|
492 | remove all of the ns definitions on that node. The removed
|
---|
493 | namespaces are returned as a linked list.
|
---|
494 |
|
---|
495 | Note: If any child nodes referred to the removed namespaces,
|
---|
496 | they will be left with dangling links. You should call
|
---|
497 | renconciliateNs() to fix those pointers.
|
---|
498 |
|
---|
499 | Note: This method does not free memory taken by the ns
|
---|
500 | definitions. You will need to free it manually with the
|
---|
501 | freeNsList() method on the returns xmlNs object.
|
---|
502 | """
|
---|
503 |
|
---|
504 | ret = libxml2mod.xmlNodeRemoveNsDef(self._o, href)
|
---|
505 | if ret is None:return None
|
---|
506 | __tmp = xmlNs(_obj=ret)
|
---|
507 | return __tmp
|
---|
508 |
|
---|
509 | # support for python2 iterators
|
---|
510 | def walk_depth_first(self):
|
---|
511 | return xmlCoreDepthFirstItertor(self)
|
---|
512 | def walk_breadth_first(self):
|
---|
513 | return xmlCoreBreadthFirstItertor(self)
|
---|
514 | __iter__ = walk_depth_first
|
---|
515 |
|
---|
516 | def free(self):
|
---|
517 | try:
|
---|
518 | self.doc._ctxt.xpathFreeContext()
|
---|
519 | except:
|
---|
520 | pass
|
---|
521 | libxml2mod.xmlFreeDoc(self._o)
|
---|
522 |
|
---|
523 |
|
---|
524 | #
|
---|
525 | # implements the depth-first iterator for libxml2 DOM tree
|
---|
526 | #
|
---|
527 | class xmlCoreDepthFirstItertor:
|
---|
528 | def __init__(self, node):
|
---|
529 | self.node = node
|
---|
530 | self.parents = []
|
---|
531 | def __iter__(self):
|
---|
532 | return self
|
---|
533 | def __next__(self):
|
---|
534 | while 1:
|
---|
535 | if self.node:
|
---|
536 | ret = self.node
|
---|
537 | self.parents.append(self.node)
|
---|
538 | self.node = self.node.children
|
---|
539 | return ret
|
---|
540 | try:
|
---|
541 | parent = self.parents.pop()
|
---|
542 | except IndexError:
|
---|
543 | raise StopIteration
|
---|
544 | self.node = parent.next
|
---|
545 | next = __next__
|
---|
546 |
|
---|
547 | #
|
---|
548 | # implements the breadth-first iterator for libxml2 DOM tree
|
---|
549 | #
|
---|
550 | class xmlCoreBreadthFirstItertor:
|
---|
551 | def __init__(self, node):
|
---|
552 | self.node = node
|
---|
553 | self.parents = []
|
---|
554 | def __iter__(self):
|
---|
555 | return self
|
---|
556 | def __next__(self):
|
---|
557 | while 1:
|
---|
558 | if self.node:
|
---|
559 | ret = self.node
|
---|
560 | self.parents.append(self.node)
|
---|
561 | self.node = self.node.next
|
---|
562 | return ret
|
---|
563 | try:
|
---|
564 | parent = self.parents.pop()
|
---|
565 | except IndexError:
|
---|
566 | raise StopIteration
|
---|
567 | self.node = parent.children
|
---|
568 | next = __next__
|
---|
569 |
|
---|
570 | #
|
---|
571 | # converters to present a nicer view of the XPath returns
|
---|
572 | #
|
---|
573 | def nodeWrap(o):
|
---|
574 | # TODO try to cast to the most appropriate node class
|
---|
575 | name = libxml2mod.type(o)
|
---|
576 | if name == "element" or name == "text":
|
---|
577 | return xmlNode(_obj=o)
|
---|
578 | if name == "attribute":
|
---|
579 | return xmlAttr(_obj=o)
|
---|
580 | if name[0:8] == "document":
|
---|
581 | return xmlDoc(_obj=o)
|
---|
582 | if name == "namespace":
|
---|
583 | return xmlNs(_obj=o)
|
---|
584 | if name == "elem_decl":
|
---|
585 | return xmlElement(_obj=o)
|
---|
586 | if name == "attribute_decl":
|
---|
587 | return xmlAttribute(_obj=o)
|
---|
588 | if name == "entity_decl":
|
---|
589 | return xmlEntity(_obj=o)
|
---|
590 | if name == "dtd":
|
---|
591 | return xmlDtd(_obj=o)
|
---|
592 | return xmlNode(_obj=o)
|
---|
593 |
|
---|
594 | def xpathObjectRet(o):
|
---|
595 | otype = type(o)
|
---|
596 | if otype == type([]):
|
---|
597 | ret = list(map(xpathObjectRet, o))
|
---|
598 | return ret
|
---|
599 | elif otype == type(()):
|
---|
600 | ret = list(map(xpathObjectRet, o))
|
---|
601 | return tuple(ret)
|
---|
602 | elif otype == type('') or otype == type(0) or otype == type(0.0):
|
---|
603 | return o
|
---|
604 | else:
|
---|
605 | return nodeWrap(o)
|
---|
606 |
|
---|
607 | #
|
---|
608 | # register an XPath function
|
---|
609 | #
|
---|
610 | def registerXPathFunction(ctxt, name, ns_uri, f):
|
---|
611 | ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f)
|
---|
612 |
|
---|
613 | #
|
---|
614 | # For the xmlTextReader parser configuration
|
---|
615 | #
|
---|
616 | PARSER_LOADDTD=1
|
---|
617 | PARSER_DEFAULTATTRS=2
|
---|
618 | PARSER_VALIDATE=3
|
---|
619 | PARSER_SUBST_ENTITIES=4
|
---|
620 |
|
---|
621 | #
|
---|
622 | # For the error callback severities
|
---|
623 | #
|
---|
624 | PARSER_SEVERITY_VALIDITY_WARNING=1
|
---|
625 | PARSER_SEVERITY_VALIDITY_ERROR=2
|
---|
626 | PARSER_SEVERITY_WARNING=3
|
---|
627 | PARSER_SEVERITY_ERROR=4
|
---|
628 |
|
---|
629 | #
|
---|
630 | # register the libxml2 error handler
|
---|
631 | #
|
---|
632 | def registerErrorHandler(f, ctx):
|
---|
633 | """Register a Python written function to for error reporting.
|
---|
634 | The function is called back as f(ctx, error). """
|
---|
635 | import sys
|
---|
636 | if 'libxslt' not in sys.modules:
|
---|
637 | # normal behaviour when libxslt is not imported
|
---|
638 | ret = libxml2mod.xmlRegisterErrorHandler(f,ctx)
|
---|
639 | else:
|
---|
640 | # when libxslt is already imported, one must
|
---|
641 | # use libxst's error handler instead
|
---|
642 | import libxslt
|
---|
643 | ret = libxslt.registerErrorHandler(f,ctx)
|
---|
644 | return ret
|
---|
645 |
|
---|
646 | class parserCtxtCore:
|
---|
647 |
|
---|
648 | def __init__(self, _obj=None):
|
---|
649 | if _obj != None:
|
---|
650 | self._o = _obj;
|
---|
651 | return
|
---|
652 | self._o = None
|
---|
653 |
|
---|
654 | def __del__(self):
|
---|
655 | if self._o != None:
|
---|
656 | libxml2mod.xmlFreeParserCtxt(self._o)
|
---|
657 | self._o = None
|
---|
658 |
|
---|
659 | def setErrorHandler(self,f,arg):
|
---|
660 | """Register an error handler that will be called back as
|
---|
661 | f(arg,msg,severity,reserved).
|
---|
662 |
|
---|
663 | @reserved is currently always None."""
|
---|
664 | libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg)
|
---|
665 |
|
---|
666 | def getErrorHandler(self):
|
---|
667 | """Return (f,arg) as previously registered with setErrorHandler
|
---|
668 | or (None,None)."""
|
---|
669 | return libxml2mod.xmlParserCtxtGetErrorHandler(self._o)
|
---|
670 |
|
---|
671 | def addLocalCatalog(self, uri):
|
---|
672 | """Register a local catalog with the parser"""
|
---|
673 | return libxml2mod.addLocalCatalog(self._o, uri)
|
---|
674 |
|
---|
675 |
|
---|
676 | class ValidCtxtCore:
|
---|
677 |
|
---|
678 | def __init__(self, *args, **kw):
|
---|
679 | pass
|
---|
680 |
|
---|
681 | def setValidityErrorHandler(self, err_func, warn_func, arg=None):
|
---|
682 | """
|
---|
683 | Register error and warning handlers for DTD validation.
|
---|
684 | These will be called back as f(msg,arg)
|
---|
685 | """
|
---|
686 | libxml2mod.xmlSetValidErrors(self._o, err_func, warn_func, arg)
|
---|
687 |
|
---|
688 |
|
---|
689 | class SchemaValidCtxtCore:
|
---|
690 |
|
---|
691 | def __init__(self, *args, **kw):
|
---|
692 | pass
|
---|
693 |
|
---|
694 | def setValidityErrorHandler(self, err_func, warn_func, arg=None):
|
---|
695 | """
|
---|
696 | Register error and warning handlers for Schema validation.
|
---|
697 | These will be called back as f(msg,arg)
|
---|
698 | """
|
---|
699 | libxml2mod.xmlSchemaSetValidErrors(self._o, err_func, warn_func, arg)
|
---|
700 |
|
---|
701 |
|
---|
702 | class relaxNgValidCtxtCore:
|
---|
703 |
|
---|
704 | def __init__(self, *args, **kw):
|
---|
705 | pass
|
---|
706 |
|
---|
707 | def setValidityErrorHandler(self, err_func, warn_func, arg=None):
|
---|
708 | """
|
---|
709 | Register error and warning handlers for RelaxNG validation.
|
---|
710 | These will be called back as f(msg,arg)
|
---|
711 | """
|
---|
712 | libxml2mod.xmlRelaxNGSetValidErrors(self._o, err_func, warn_func, arg)
|
---|
713 |
|
---|
714 |
|
---|
715 | def _xmlTextReaderErrorFunc(xxx_todo_changeme,msg,severity,locator):
|
---|
716 | """Intermediate callback to wrap the locator"""
|
---|
717 | (f,arg) = xxx_todo_changeme
|
---|
718 | return f(arg,msg,severity,xmlTextReaderLocator(locator))
|
---|
719 |
|
---|
720 | class xmlTextReaderCore:
|
---|
721 |
|
---|
722 | def __init__(self, _obj=None):
|
---|
723 | self.input = None
|
---|
724 | if _obj != None:self._o = _obj;return
|
---|
725 | self._o = None
|
---|
726 |
|
---|
727 | def __del__(self):
|
---|
728 | if self._o != None:
|
---|
729 | libxml2mod.xmlFreeTextReader(self._o)
|
---|
730 | self._o = None
|
---|
731 |
|
---|
732 | def SetErrorHandler(self,f,arg):
|
---|
733 | """Register an error handler that will be called back as
|
---|
734 | f(arg,msg,severity,locator)."""
|
---|
735 | if f is None:
|
---|
736 | libxml2mod.xmlTextReaderSetErrorHandler(\
|
---|
737 | self._o,None,None)
|
---|
738 | else:
|
---|
739 | libxml2mod.xmlTextReaderSetErrorHandler(\
|
---|
740 | self._o,_xmlTextReaderErrorFunc,(f,arg))
|
---|
741 |
|
---|
742 | def GetErrorHandler(self):
|
---|
743 | """Return (f,arg) as previously registered with setErrorHandler
|
---|
744 | or (None,None)."""
|
---|
745 | f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o)
|
---|
746 | if f is None:
|
---|
747 | return None,None
|
---|
748 | else:
|
---|
749 | # assert f is _xmlTextReaderErrorFunc
|
---|
750 | return arg
|
---|
751 |
|
---|
752 | #
|
---|
753 | # The cleanup now goes though a wrapper in libxml.c
|
---|
754 | #
|
---|
755 | def cleanupParser():
|
---|
756 | libxml2mod.xmlPythonCleanupParser()
|
---|
757 |
|
---|
758 | #
|
---|
759 | # The interface to xmlRegisterInputCallbacks.
|
---|
760 | # Since this API does not allow to pass a data object along with
|
---|
761 | # match/open callbacks, it is necessary to maintain a list of all
|
---|
762 | # Python callbacks.
|
---|
763 | #
|
---|
764 | __input_callbacks = []
|
---|
765 | def registerInputCallback(func):
|
---|
766 | def findOpenCallback(URI):
|
---|
767 | for cb in reversed(__input_callbacks):
|
---|
768 | o = cb(URI)
|
---|
769 | if o is not None:
|
---|
770 | return o
|
---|
771 | libxml2mod.xmlRegisterInputCallback(findOpenCallback)
|
---|
772 | __input_callbacks.append(func)
|
---|
773 |
|
---|
774 | def popInputCallbacks():
|
---|
775 | # First pop python-level callbacks, when no more available - start
|
---|
776 | # popping built-in ones.
|
---|
777 | if len(__input_callbacks) > 0:
|
---|
778 | __input_callbacks.pop()
|
---|
779 | if len(__input_callbacks) == 0:
|
---|
780 | libxml2mod.xmlUnregisterInputCallback()
|
---|
781 |
|
---|
782 | # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
|
---|
783 | #
|
---|
784 | # Everything before this line comes from libxml.py
|
---|
785 | # Everything after this line is automatically generated
|
---|
786 | #
|
---|
787 | # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
|
---|
788 |
|
---|