# read-only tree implementation
@cython.internal
cdef class _ReadOnlyProxy:
u"A read-only proxy class suitable for PIs/Comments (for internal use only!)."
cdef bint _free_after_use
cdef xmlNode* _c_node
cdef _ReadOnlyProxy _source_proxy
cdef list _dependent_proxies
def __cinit__(self):
self._c_node = NULL
self._free_after_use = 0
cdef int _assertNode(self) except -1:
u"""This is our way of saying: this proxy is invalid!
"""
if not self._c_node:
raise ReferenceError("Proxy invalidated!")
return 0
cdef int _raise_unsupported_type(self) except -1:
raise TypeError(f"Unsupported node type: {self._c_node.type}")
cdef void free_after_use(self):
u"""Should the xmlNode* be freed when releasing the proxy?
"""
self._free_after_use = 1
@property
def tag(self):
"""Element tag
"""
self._assertNode()
if self._c_node.type == tree.XML_ELEMENT_NODE:
return _namespacedName(self._c_node)
elif self._c_node.type == tree.XML_PI_NODE:
return ProcessingInstruction
elif self._c_node.type == tree.XML_COMMENT_NODE:
return Comment
elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
return Entity
else:
self._raise_unsupported_type()
@property
def text(self):
"""Text before the first subelement. This is either a string or
the value None, if there was no text.
"""
self._assertNode()
if self._c_node.type == tree.XML_ELEMENT_NODE:
return _collectText(self._c_node.children)
elif self._c_node.type in (tree.XML_PI_NODE,
tree.XML_COMMENT_NODE):
if self._c_node.content is NULL:
return ''
else:
return funicode(self._c_node.content)
elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
return f'&{funicode(self._c_node.name)};'
else:
self._raise_unsupported_type()
@property
def tail(self):
"""Text after this element's end tag, but before the next sibling
element's start tag. This is either a string or the value None, if
there was no text.
"""
self._assertNode()
return _collectText(self._c_node.next)
@property
def sourceline(self):
"""Original line number as found by the parser or None if unknown.
"""
cdef long line
self._assertNode()
line = tree.xmlGetLineNo(self._c_node)
if line > 0:
return line
else:
return None
def __repr__(self):
self._assertNode()
if self._c_node.type == tree.XML_ELEMENT_NODE:
return "<Element %s at 0x%x>" % (strrepr(self.tag), id(self))
elif self._c_node.type == tree.XML_COMMENT_NODE:
return "<!--%s-->" % strrepr(self.text)
elif self._c_node.type == tree.XML_ENTITY_NODE:
return "&%s;" % strrepr(funicode(self._c_node.name))
elif self._c_node.type == tree.XML_PI_NODE:
text = self.text
if text:
return "<?%s %s?>" % (strrepr(self.target), text)
else:
return "<?%s?>" % strrepr(self.target)
else:
self._raise_unsupported_type()
def __getitem__(self, x):
u"""Returns the subelement at the given position or the requested
slice.
"""
cdef xmlNode* c_node = NULL
cdef Py_ssize_t step = 0, slicelength = 0
cdef Py_ssize_t c, i
cdef _node_to_node_function next_element
cdef list result
self._assertNode()
if isinstance(x, slice):
# slicing
if _isFullSlice(<slice>x):
return _collectChildren(self)
_findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
if c_node is NULL:
return []
if step > 0:
next_element = _nextElement
else:
step = -step
next_element = _previousElement
result = []
c = 0
while c_node is not NULL and c < slicelength:
result.append(_newReadOnlyProxy(self._source_proxy, c_node))
result.append(_elementFactory(self._doc, c_node))
c = c + 1
for i from 0 <= i < step:
c_node = next_element(c_node)
return result
else:
# indexing
c_node = _findChild(self._c_node, x)
if c_node is NULL:
raise IndexError, u"list index out of range"
return _newReadOnlyProxy(self._source_proxy, c_node)
def __len__(self):
u"""Returns the number of subelements.
"""
cdef Py_ssize_t c
cdef xmlNode* c_node
self._assertNode()
c = 0
c_node = self._c_node.children
while c_node is not NULL:
if tree._isElement(c_node):
c = c + 1
c_node = c_node.next
return c
def __nonzero__(self):
cdef xmlNode* c_node
self._assertNode()
c_node = _findChildBackwards(self._c_node, 0)
return c_node != NULL
def __deepcopy__(self, memo):
u"__deepcopy__(self, memo)"
return self.__copy__()
cpdef __copy__(self):
u"__copy__(self)"
cdef xmlDoc* c_doc
cdef xmlNode* c_node
cdef _Document new_doc
if self._c_node is NULL:
return self
c_doc = _copyDocRoot(self._c_node.doc, self._c_node) # recursive
new_doc = _documentFactory(c_doc, None)
root = new_doc.getroot()
if root is not None:
return root
# Comment/PI
c_node = c_doc.children
while c_node is not NULL and c_node.type != self._c_node.type:
c_node = c_node.next
if c_node is NULL:
return None
return _elementFactory(new_doc, c_node)
def __iter__(self):
return iter(self.getchildren())
def iterchildren(self, tag=None, *, reversed=False):
u"""iterchildren(self, tag=None, reversed=False)
Iterate over the children of this element.
"""
children = self.getchildren()
if tag is not None and tag != '*':
children = [ el for el in children if el.tag == tag ]
if reversed:
children = children[::-1]
return iter(children)
cpdef getchildren(self):
u"""Returns all subelements. The elements are returned in document
order.
"""
cdef xmlNode* c_node
cdef list result
self._assertNode()
result = []
c_node = self._c_node.children
while c_node is not NULL:
if tree._isElement(c_node):
result.append(_newReadOnlyProxy(self._source_proxy, c_node))
c_node = c_node.next
return result
def getparent(self):
u"""Returns the parent of this element or None for the root element.
"""
cdef xmlNode* c_parent
self._assertNode()
c_parent = self._c_node.parent
if c_parent is NULL or not tree._isElement(c_parent):
return None
else:
return _newReadOnlyProxy(self._source_proxy, c_parent)
def getnext(self):
u"""Returns the following sibling of this element or None.
"""
cdef xmlNode* c_node
self._assertNode()
c_node = _nextElement(self._c_node)
if c_node is not NULL:
return _newReadOnlyProxy(self._source_proxy, c_node)
return None
def getprevious(self):
u"""Returns the preceding sibling of this element or None.
"""
cdef xmlNode* c_node
self._assertNode()
c_node = _previousElement(self._c_node)
if c_node is not NULL:
return _newReadOnlyProxy(self._source_proxy, c_node)
return None
@cython.final
@cython.internal
cdef class _ReadOnlyPIProxy(_ReadOnlyProxy):
"""A read-only proxy for processing instructions (for internal use only!)"""
@property
def target(self):
self._assertNode()
return funicode(self._c_node.name)
@cython.final
@cython.internal
cdef class _ReadOnlyEntityProxy(_ReadOnlyProxy):
"""A read-only proxy for entity references (for internal use only!)"""
property name:
def __get__(self):
return funicode(self._c_node.name)
def __set__(self, value):
value_utf = _utf8(value)
if u'&' in value or u';' in value:
raise ValueError(f"Invalid entity name '{value}'")
tree.xmlNodeSetName(self._c_node, _xcstr(value_utf))
@property
def text(self):
return f'&{funicode(self._c_node.name)};'
@cython.internal
cdef class _ReadOnlyElementProxy(_ReadOnlyProxy):
"""The main read-only Element proxy class (for internal use only!)."""
@property
def attrib(self):
self._assertNode()
return dict(_collectAttributes(self._c_node, 3))
@property
def prefix(self):
"""Namespace prefix or None.
"""
self._assertNode()
if self._c_node.ns is not NULL:
if self._c_node.ns.prefix is not NULL:
return funicode(self._c_node.ns.prefix)
return None
@property
def nsmap(self):
"""Namespace prefix->URI mapping known in the context of this
Element. This includes all namespace declarations of the
parents.
Note that changing the returned dict has no effect on the Element.
"""
self._assertNode()
return _build_nsmap(self._c_node)
def get(self, key, default=None):
u"""Gets an element attribute.
"""
self._assertNode()
return _getNodeAttributeValue(self._c_node, key, default)
def keys(self):
u"""Gets a list of attribute names. The names are returned in an
arbitrary order (just like for an ordinary Python dictionary).
"""
self._assertNode()
return _collectAttributes(self._c_node, 1)
def values(self):
u"""Gets element attributes, as a sequence. The attributes are returned
in an arbitrary order.
"""
self._assertNode()
return _collectAttributes(self._c_node, 2)
def items(self):
u"""Gets element attributes, as a sequence. The attributes are returned
in an arbitrary order.
"""
self._assertNode()
return _collectAttributes(self._c_node, 3)
cdef _ReadOnlyProxy _newReadOnlyProxy(
_ReadOnlyProxy source_proxy, xmlNode* c_node):
cdef _ReadOnlyProxy el
if c_node.type == tree.XML_ELEMENT_NODE:
el = _ReadOnlyElementProxy.__new__(_ReadOnlyElementProxy)
elif c_node.type == tree.XML_PI_NODE:
el = _ReadOnlyPIProxy.__new__(_ReadOnlyPIProxy)
elif c_node.type in (tree.XML_COMMENT_NODE,
tree.XML_ENTITY_REF_NODE):
el = _ReadOnlyProxy.__new__(_ReadOnlyProxy)
else:
raise TypeError(f"Unsupported element type: {c_node.type}")
el._c_node = c_node
_initReadOnlyProxy(el, source_proxy)
return el
cdef inline _initReadOnlyProxy(_ReadOnlyProxy el,
_ReadOnlyProxy source_proxy):
if source_proxy is None:
el._source_proxy = el
el._dependent_proxies = [el]
else:
el._source_proxy = source_proxy
source_proxy._dependent_proxies.append(el)
cdef _freeReadOnlyProxies(_ReadOnlyProxy sourceProxy):
cdef xmlNode* c_node
cdef _ReadOnlyProxy el
if sourceProxy is None:
return
if sourceProxy._dependent_proxies is None:
return
for el in sourceProxy._dependent_proxies:
c_node = el._c_node
el._c_node = NULL
if el._free_after_use:
tree.xmlFreeNode(c_node)
del sourceProxy._dependent_proxies[:]
# opaque wrapper around non-element nodes, e.g. the document node
#
# This class does not imply any restrictions on modifiability or
# read-only status of the node, so use with caution.
@cython.internal
cdef class _OpaqueNodeWrapper:
cdef tree.xmlNode* _c_node
def __init__(self):
raise TypeError, u"This type cannot be instantiated from Python"
@cython.final
@cython.internal
cdef class _OpaqueDocumentWrapper(_OpaqueNodeWrapper):
cdef int _assertNode(self) except -1:
u"""This is our way of saying: this proxy is invalid!
"""
assert self._c_node is not NULL, u"Proxy invalidated!"
return 0
cpdef append(self, other_element):
u"""Append a copy of an Element to the list of children.
"""
cdef xmlNode* c_next
cdef xmlNode* c_node
self._assertNode()
c_node = _roNodeOf(other_element)
if c_node.type == tree.XML_ELEMENT_NODE:
if tree.xmlDocGetRootElement(<tree.xmlDoc*>self._c_node) is not NULL:
raise ValueError, u"cannot append, document already has a root element"
elif c_node.type not in (tree.XML_PI_NODE, tree.XML_COMMENT_NODE):
raise TypeError, f"unsupported element type for top-level node: {c_node.type}"
c_node = _copyNodeToDoc(c_node, <tree.xmlDoc*>self._c_node)
c_next = c_node.next
tree.xmlAddChild(self._c_node, c_node)
_moveTail(c_next, c_node)
def extend(self, elements):
u"""Append a copy of all Elements from a sequence to the list of
children.
"""
self._assertNode()
for element in elements:
self.append(element)
cdef _OpaqueNodeWrapper _newOpaqueAppendOnlyNodeWrapper(xmlNode* c_node):
cdef _OpaqueNodeWrapper node
if c_node.type in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE):
node = _OpaqueDocumentWrapper.__new__(_OpaqueDocumentWrapper)
else:
node = _OpaqueNodeWrapper.__new__(_OpaqueNodeWrapper)
node._c_node = c_node
return node
# element proxies that allow restricted modification
@cython.internal
cdef class _ModifyContentOnlyProxy(_ReadOnlyProxy):
u"""A read-only proxy that allows changing the text content.
"""
property text:
def __get__(self):
self._assertNode()
if self._c_node.content is NULL:
return ''
else:
return funicode(self._c_node.content)
def __set__(self, value):
cdef tree.xmlDict* c_dict
self._assertNode()
if value is None:
c_text = <const_xmlChar*>NULL
else:
value = _utf8(value)
c_text = _xcstr(value)
tree.xmlNodeSetContent(self._c_node, c_text)
@cython.final
@cython.internal
cdef class _ModifyContentOnlyPIProxy(_ModifyContentOnlyProxy):
"""A read-only proxy that allows changing the text/target content of a
processing instruction.
"""
property target:
def __get__(self):
self._assertNode()
return funicode(self._c_node.name)
def __set__(self, value):
self._assertNode()
value = _utf8(value)
c_text = _xcstr(value)
tree.xmlNodeSetName(self._c_node, c_text)
@cython.final
@cython.internal
cdef class _ModifyContentOnlyEntityProxy(_ModifyContentOnlyProxy):
"A read-only proxy for entity references (for internal use only!)"
property name:
def __get__(self):
return funicode(self._c_node.name)
def __set__(self, value):
value = _utf8(value)
assert u'&' not in value and u';' not in value, \
f"Invalid entity name '{value}'"
c_text = _xcstr(value)
tree.xmlNodeSetName(self._c_node, c_text)
@cython.final
@cython.internal
cdef class _AppendOnlyElementProxy(_ReadOnlyElementProxy):
u"""A read-only element that allows adding children and changing the
text content (i.e. everything that adds to the subtree).
"""
cpdef append(self, other_element):
u"""Append a copy of an Element to the list of children.
"""
cdef xmlNode* c_next
cdef xmlNode* c_node
self._assertNode()
c_node = _roNodeOf(other_element)
c_node = _copyNodeToDoc(c_node, self._c_node.doc)
c_next = c_node.next
tree.xmlAddChild(self._c_node, c_node)
_moveTail(c_next, c_node)
def extend(self, elements):
u"""Append a copy of all Elements from a sequence to the list of
children.
"""
self._assertNode()
for element in elements:
self.append(element)
property text:
"""Text before the first subelement. This is either a string or the
value None, if there was no text.
"""
def __get__(self):
self._assertNode()
return _collectText(self._c_node.children)
def __set__(self, value):
self._assertNode()
if isinstance(value, QName):
value = _resolveQNameText(self, value).decode('utf8')
_setNodeText(self._c_node, value)
cdef _ReadOnlyProxy _newAppendOnlyProxy(
_ReadOnlyProxy source_proxy, xmlNode* c_node):
cdef _ReadOnlyProxy el
if c_node.type == tree.XML_ELEMENT_NODE:
el = _AppendOnlyElementProxy.__new__(_AppendOnlyElementProxy)
elif c_node.type == tree.XML_PI_NODE:
el = _ModifyContentOnlyPIProxy.__new__(_ModifyContentOnlyPIProxy)
elif c_node.type == tree.XML_COMMENT_NODE:
el = _ModifyContentOnlyProxy.__new__(_ModifyContentOnlyProxy)
else:
raise TypeError(f"Unsupported element type: {c_node.type}")
el._c_node = c_node
_initReadOnlyProxy(el, source_proxy)
return el
cdef xmlNode* _roNodeOf(element) except NULL:
cdef xmlNode* c_node
if isinstance(element, _Element):
c_node = (<_Element>element)._c_node
elif isinstance(element, _ReadOnlyProxy):
c_node = (<_ReadOnlyProxy>element)._c_node
elif isinstance(element, _OpaqueNodeWrapper):
c_node = (<_OpaqueNodeWrapper>element)._c_node
else:
raise TypeError, f"invalid argument type {type(element)}"
if c_node is NULL:
raise TypeError, u"invalid element"
return c_node
cdef xmlNode* _nonRoNodeOf(element) except NULL:
cdef xmlNode* c_node
if isinstance(element, _Element):
c_node = (<_Element>element)._c_node
elif isinstance(element, _AppendOnlyElementProxy):
c_node = (<_AppendOnlyElementProxy>element)._c_node
elif isinstance(element, _OpaqueNodeWrapper):
c_node = (<_OpaqueNodeWrapper>element)._c_node
else:
raise TypeError, f"invalid argument type {type(element)}"
if c_node is NULL:
raise TypeError, u"invalid element"
return c_node