Commit d1ab2651 authored by Eli Bendersky's avatar Eli Bendersky

Replace the iter/itertext methods of Element in _elementtree with true C...

Replace the iter/itertext methods of Element in _elementtree with true C implementations, instead of the bootstrapped Python code. In addition to being cleaner (removing the last remains of the bootstrapping code in _elementtree), this gives a 10x performance boost for iter() on large documents.
Also reorganized the tests a bit to be more robust.
parent d4fdbef3
...@@ -23,7 +23,8 @@ import weakref ...@@ -23,7 +23,8 @@ import weakref
from test import support from test import support
from test.support import findfile, import_fresh_module, gc_collect from test.support import findfile, import_fresh_module, gc_collect
pyET = import_fresh_module('xml.etree.ElementTree', blocked=['_elementtree']) pyET = None
ET = None
SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
try: try:
...@@ -209,10 +210,8 @@ def interface(): ...@@ -209,10 +210,8 @@ def interface():
These methods return an iterable. See bug 6472. These methods return an iterable. See bug 6472.
>>> check_method(element.iter("tag").__next__)
>>> check_method(element.iterfind("tag").__next__) >>> check_method(element.iterfind("tag").__next__)
>>> check_method(element.iterfind("*").__next__) >>> check_method(element.iterfind("*").__next__)
>>> check_method(tree.iter("tag").__next__)
>>> check_method(tree.iterfind("tag").__next__) >>> check_method(tree.iterfind("tag").__next__)
>>> check_method(tree.iterfind("*").__next__) >>> check_method(tree.iterfind("*").__next__)
...@@ -291,42 +290,6 @@ def cdata(): ...@@ -291,42 +290,6 @@ def cdata():
'<tag>hello</tag>' '<tag>hello</tag>'
""" """
# Only with Python implementation
def simplefind():
"""
Test find methods using the elementpath fallback.
>>> ElementTree = pyET
>>> CurrentElementPath = ElementTree.ElementPath
>>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
>>> elem = ElementTree.XML(SAMPLE_XML)
>>> elem.find("tag").tag
'tag'
>>> ElementTree.ElementTree(elem).find("tag").tag
'tag'
>>> elem.findtext("tag")
'text'
>>> elem.findtext("tog")
>>> elem.findtext("tog", "default")
'default'
>>> ElementTree.ElementTree(elem).findtext("tag")
'text'
>>> summarize_list(elem.findall("tag"))
['tag', 'tag']
>>> summarize_list(elem.findall(".//tag"))
['tag', 'tag', 'tag']
Path syntax doesn't work in this case.
>>> elem.find("section/tag")
>>> elem.findtext("section/tag")
>>> summarize_list(elem.findall("section/tag"))
[]
>>> ElementTree.ElementPath = CurrentElementPath
"""
def find(): def find():
""" """
Test find methods (including xpath syntax). Test find methods (including xpath syntax).
...@@ -1002,36 +965,6 @@ def methods(): ...@@ -1002,36 +965,6 @@ def methods():
'1 < 2\n' '1 < 2\n'
""" """
def iterators():
"""
Test iterators.
>>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
>>> summarize_list(e.iter())
['html', 'body', 'i']
>>> summarize_list(e.find("body").iter())
['body', 'i']
>>> summarize(next(e.iter()))
'html'
>>> "".join(e.itertext())
'this is a paragraph...'
>>> "".join(e.find("body").itertext())
'this is a paragraph.'
>>> next(e.itertext())
'this is a '
Method iterparse should return an iterator. See bug 6472.
>>> sourcefile = serialize(e, to_string=False)
>>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS
('end', <Element 'i' at 0x...>)
>>> tree = ET.ElementTree(None)
>>> tree.iter()
Traceback (most recent call last):
AttributeError: 'NoneType' object has no attribute 'iter'
"""
ENTITY_XML = """\ ENTITY_XML = """\
<!DOCTYPE points [ <!DOCTYPE points [
<!ENTITY % user-entities SYSTEM 'user-entities.xml'> <!ENTITY % user-entities SYSTEM 'user-entities.xml'>
...@@ -1339,6 +1272,7 @@ XINCLUDE["default.xml"] = """\ ...@@ -1339,6 +1272,7 @@ XINCLUDE["default.xml"] = """\
</document> </document>
""".format(html.escape(SIMPLE_XMLFILE, True)) """.format(html.escape(SIMPLE_XMLFILE, True))
def xinclude_loader(href, parse="xml", encoding=None): def xinclude_loader(href, parse="xml", encoding=None):
try: try:
data = XINCLUDE[href] data = XINCLUDE[href]
...@@ -1411,22 +1345,6 @@ def xinclude(): ...@@ -1411,22 +1345,6 @@ def xinclude():
>>> # print(serialize(document)) # C5 >>> # print(serialize(document)) # C5
""" """
def xinclude_default():
"""
>>> from xml.etree import ElementInclude
>>> document = xinclude_loader("default.xml")
>>> ElementInclude.include(document)
>>> print(serialize(document)) # default
<document>
<p>Example.</p>
<root>
<element key="value">text</element>
<element>text</element>tail
<empty-element />
</root>
</document>
"""
# #
# badly formatted xi:include tags # badly formatted xi:include tags
...@@ -1917,9 +1835,8 @@ class ElementTreeTest(unittest.TestCase): ...@@ -1917,9 +1835,8 @@ class ElementTreeTest(unittest.TestCase):
self.assertIsInstance(ET.QName, type) self.assertIsInstance(ET.QName, type)
self.assertIsInstance(ET.ElementTree, type) self.assertIsInstance(ET.ElementTree, type)
self.assertIsInstance(ET.Element, type) self.assertIsInstance(ET.Element, type)
# XXX issue 14128 with C ElementTree self.assertIsInstance(ET.TreeBuilder, type)
# self.assertIsInstance(ET.TreeBuilder, type) self.assertIsInstance(ET.XMLParser, type)
# self.assertIsInstance(ET.XMLParser, type)
def test_Element_subclass_trivial(self): def test_Element_subclass_trivial(self):
class MyElement(ET.Element): class MyElement(ET.Element):
...@@ -1953,6 +1870,73 @@ class ElementTreeTest(unittest.TestCase): ...@@ -1953,6 +1870,73 @@ class ElementTreeTest(unittest.TestCase):
self.assertEqual(mye.newmethod(), 'joe') self.assertEqual(mye.newmethod(), 'joe')
class ElementIterTest(unittest.TestCase):
def _ilist(self, elem, tag=None):
return summarize_list(elem.iter(tag))
def test_basic(self):
doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
self.assertEqual(next(doc.iter()).tag, 'html')
self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
self.assertEqual(''.join(doc.find('body').itertext()),
'this is a paragraph.')
self.assertEqual(next(doc.itertext()), 'this is a ')
# iterparse should return an iterator
sourcefile = serialize(doc, to_string=False)
self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
tree = ET.ElementTree(None)
self.assertRaises(AttributeError, tree.iter)
def test_corners(self):
# single root, no subelements
a = ET.Element('a')
self.assertEqual(self._ilist(a), ['a'])
# one child
b = ET.SubElement(a, 'b')
self.assertEqual(self._ilist(a), ['a', 'b'])
# one child and one grandchild
c = ET.SubElement(b, 'c')
self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
# two children, only first with grandchild
d = ET.SubElement(a, 'd')
self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
# replace first child by second
a[0] = a[1]
del a[1]
self.assertEqual(self._ilist(a), ['a', 'd'])
def test_iter_by_tag(self):
doc = ET.XML('''
<document>
<house>
<room>bedroom1</room>
<room>bedroom2</room>
</house>
<shed>nothing here
</shed>
<house>
<room>bedroom8</room>
</house>
</document>''')
self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
# make sure both tag=None and tag='*' return all tags
all_tags = ['document', 'house', 'room', 'room',
'shed', 'house', 'room']
self.assertEqual(self._ilist(doc), all_tags)
self.assertEqual(self._ilist(doc, '*'), all_tags)
class TreeBuilderTest(unittest.TestCase): class TreeBuilderTest(unittest.TestCase):
sample1 = ('<!DOCTYPE html PUBLIC' sample1 = ('<!DOCTYPE html PUBLIC'
' "-//W3C//DTD XHTML 1.0 Transitional//EN"' ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
...@@ -2026,7 +2010,20 @@ class TreeBuilderTest(unittest.TestCase): ...@@ -2026,7 +2010,20 @@ class TreeBuilderTest(unittest.TestCase):
('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
class XincludeTest(unittest.TestCase):
def test_xinclude_default(self):
from xml.etree import ElementInclude
doc = xinclude_loader('default.xml')
ElementInclude.include(doc)
s = serialize(doc)
self.assertEqual(s.strip(), '''<document>
<p>Example.</p>
<root>
<element key="value">text</element>
<element>text</element>tail
<empty-element />
</root>
</document>''')
class XMLParserTest(unittest.TestCase): class XMLParserTest(unittest.TestCase):
sample1 = '<file><line>22</line></file>' sample1 = '<file><line>22</line></file>'
sample2 = ('<!DOCTYPE html PUBLIC' sample2 = ('<!DOCTYPE html PUBLIC'
...@@ -2073,13 +2070,6 @@ class XMLParserTest(unittest.TestCase): ...@@ -2073,13 +2070,6 @@ class XMLParserTest(unittest.TestCase):
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
class NoAcceleratorTest(unittest.TestCase):
# Test that the C accelerator was not imported for pyET
def test_correct_import_pyET(self):
self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree')
self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
class NamespaceParseTest(unittest.TestCase): class NamespaceParseTest(unittest.TestCase):
def test_find_with_namespace(self): def test_find_with_namespace(self):
nsmap = {'h': 'hello', 'f': 'foo'} nsmap = {'h': 'hello', 'f': 'foo'}
...@@ -2090,7 +2080,6 @@ class NamespaceParseTest(unittest.TestCase): ...@@ -2090,7 +2080,6 @@ class NamespaceParseTest(unittest.TestCase):
self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1) self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
class ElementSlicingTest(unittest.TestCase): class ElementSlicingTest(unittest.TestCase):
def _elem_tags(self, elemlist): def _elem_tags(self, elemlist):
return [e.tag for e in elemlist] return [e.tag for e in elemlist]
...@@ -2232,6 +2221,36 @@ class KeywordArgsTest(unittest.TestCase): ...@@ -2232,6 +2221,36 @@ class KeywordArgsTest(unittest.TestCase):
with self.assertRaisesRegex(TypeError, 'must be dict, not str'): with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
ET.Element('a', attrib="I'm not a dict") ET.Element('a', attrib="I'm not a dict")
# --------------------------------------------------------------------
@unittest.skipUnless(pyET, 'only for the Python version')
class NoAcceleratorTest(unittest.TestCase):
# Test that the C accelerator was not imported for pyET
def test_correct_import_pyET(self):
self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree')
self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
class ElementPathFallbackTest(unittest.TestCase):
def test_fallback(self):
current_ElementPath = ET.ElementPath
ET.ElementPath = ET._SimpleElementPath()
elem = ET.XML(SAMPLE_XML)
self.assertEqual(elem.find('tag').tag, 'tag')
self.assertEqual(ET.ElementTree(elem).find('tag').tag, 'tag')
self.assertEqual(elem.findtext('tag'), 'text')
self.assertIsNone(elem.findtext('tog'))
self.assertEqual(elem.findtext('tog', 'default'), 'default')
self.assertEqual(ET.ElementTree(elem).findtext('tag'), 'text')
self.assertEqual(summarize_list(elem.findall('tag')), ['tag', 'tag'])
self.assertEqual(summarize_list(elem.findall('.//tag')),
['tag', 'tag', 'tag'])
self.assertIsNone(elem.find('section/tag'))
self.assertIsNone(elem.findtext('section/tag'))
self.assertEqual(summarize_list(elem.findall('section/tag')), [])
ET.ElementPath = current_ElementPath
# -------------------------------------------------------------------- # --------------------------------------------------------------------
...@@ -2276,31 +2295,43 @@ class CleanContext(object): ...@@ -2276,31 +2295,43 @@ class CleanContext(object):
self.checkwarnings.__exit__(*args) self.checkwarnings.__exit__(*args)
def test_main(module=pyET): def test_main(module=None):
from test import test_xml_etree # When invoked without a module, runs the Python ET tests by loading pyET.
# Otherwise, uses the given module as the ET.
if module is None:
global pyET
pyET = import_fresh_module('xml.etree.ElementTree',
blocked=['_elementtree'])
module = pyET
# The same doctests are used for both the Python and the C implementations global ET
test_xml_etree.ET = module ET = module
test_classes = [ test_classes = [
ElementSlicingTest, ElementSlicingTest,
BasicElementTest, BasicElementTest,
StringIOTest, StringIOTest,
ParseErrorTest, ParseErrorTest,
XincludeTest,
ElementTreeTest, ElementTreeTest,
NamespaceParseTest, ElementIterTest,
TreeBuilderTest, TreeBuilderTest,
XMLParserTest, ]
KeywordArgsTest]
if module is pyET: # These tests will only run for the pure-Python version that doesn't import
# Run the tests specific to the Python implementation # _elementtree. We can't use skipUnless here, because pyET is filled in only
test_classes += [NoAcceleratorTest] # after the module is loaded.
if pyET:
test_classes.extend([
NoAcceleratorTest,
ElementPathFallbackTest,
])
support.run_unittest(*test_classes) support.run_unittest(*test_classes)
# XXX the C module should give the same warnings as the Python module # XXX the C module should give the same warnings as the Python module
with CleanContext(quiet=(module is not pyET)): with CleanContext(quiet=(module is not pyET)):
support.run_doctest(test_xml_etree, verbosity=True) support.run_doctest(sys.modules[__name__], verbosity=True)
if __name__ == '__main__': if __name__ == '__main__':
test_main() test_main()
...@@ -8,31 +8,6 @@ cET = import_fresh_module('xml.etree.ElementTree', fresh=['_elementtree']) ...@@ -8,31 +8,6 @@ cET = import_fresh_module('xml.etree.ElementTree', fresh=['_elementtree'])
cET_alias = import_fresh_module('xml.etree.cElementTree', fresh=['_elementtree', 'xml.etree']) cET_alias = import_fresh_module('xml.etree.cElementTree', fresh=['_elementtree', 'xml.etree'])
# cElementTree specific tests
def sanity():
r"""
Import sanity.
Issue #6697.
>>> cElementTree = cET
>>> e = cElementTree.Element('a')
>>> getattr(e, '\uD800') # doctest: +ELLIPSIS
Traceback (most recent call last):
...
UnicodeEncodeError: ...
>>> p = cElementTree.XMLParser()
>>> p.version.split()[0]
'Expat'
>>> getattr(p, '\uD800')
Traceback (most recent call last):
...
AttributeError: 'XMLParser' object has no attribute '\ud800'
"""
class MiscTests(unittest.TestCase): class MiscTests(unittest.TestCase):
# Issue #8651. # Issue #8651.
@support.bigmemtest(size=support._2G + 100, memuse=1) @support.bigmemtest(size=support._2G + 100, memuse=1)
...@@ -46,6 +21,7 @@ class MiscTests(unittest.TestCase): ...@@ -46,6 +21,7 @@ class MiscTests(unittest.TestCase):
finally: finally:
data = None data = None
@unittest.skipUnless(cET, 'requires _elementtree') @unittest.skipUnless(cET, 'requires _elementtree')
class TestAliasWorking(unittest.TestCase): class TestAliasWorking(unittest.TestCase):
# Test that the cET alias module is alive # Test that the cET alias module is alive
...@@ -53,6 +29,7 @@ class TestAliasWorking(unittest.TestCase): ...@@ -53,6 +29,7 @@ class TestAliasWorking(unittest.TestCase):
e = cET_alias.Element('foo') e = cET_alias.Element('foo')
self.assertEqual(e.tag, 'foo') self.assertEqual(e.tag, 'foo')
@unittest.skipUnless(cET, 'requires _elementtree') @unittest.skipUnless(cET, 'requires _elementtree')
class TestAcceleratorImported(unittest.TestCase): class TestAcceleratorImported(unittest.TestCase):
# Test that the C accelerator was imported, as expected # Test that the C accelerator was imported, as expected
...@@ -67,7 +44,6 @@ def test_main(): ...@@ -67,7 +44,6 @@ def test_main():
from test import test_xml_etree, test_xml_etree_c from test import test_xml_etree, test_xml_etree_c
# Run the tests specific to the C implementation # Run the tests specific to the C implementation
support.run_doctest(test_xml_etree_c, verbosity=True)
support.run_unittest( support.run_unittest(
MiscTests, MiscTests,
TestAliasWorking, TestAliasWorking,
......
...@@ -916,11 +916,7 @@ def _namespaces(elem, default_namespace=None): ...@@ -916,11 +916,7 @@ def _namespaces(elem, default_namespace=None):
_raise_serialization_error(qname) _raise_serialization_error(qname)
# populate qname and namespaces table # populate qname and namespaces table
try: for elem in elem.iter():
iterate = elem.iter
except AttributeError:
iterate = elem.getiterator # cET compatibility
for elem in iterate():
tag = elem.tag tag = elem.tag
if isinstance(tag, QName): if isinstance(tag, QName):
if tag.text not in qnames: if tag.text not in qnames:
......
...@@ -103,8 +103,6 @@ do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0) ...@@ -103,8 +103,6 @@ do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
/* glue functions (see the init function for details) */ /* glue functions (see the init function for details) */
static PyObject* elementtree_parseerror_obj; static PyObject* elementtree_parseerror_obj;
static PyObject* elementtree_deepcopy_obj; static PyObject* elementtree_deepcopy_obj;
static PyObject* elementtree_iter_obj;
static PyObject* elementtree_itertext_obj;
static PyObject* elementpath_obj; static PyObject* elementpath_obj;
/* helpers */ /* helpers */
...@@ -1109,67 +1107,32 @@ element_getchildren(ElementObject* self, PyObject* args) ...@@ -1109,67 +1107,32 @@ element_getchildren(ElementObject* self, PyObject* args)
return list; return list;
} }
static PyObject*
element_iter(ElementObject* self, PyObject* args)
{
PyObject* result;
PyObject* tag = Py_None; static PyObject *
if (!PyArg_ParseTuple(args, "|O:iter", &tag)) create_elementiter(ElementObject *self, PyObject *tag, int gettext);
return NULL;
if (!elementtree_iter_obj) {
PyErr_SetString(
PyExc_RuntimeError,
"iter helper not found"
);
return NULL;
}
args = PyTuple_New(2); static PyObject *
if (!args) element_iter(ElementObject *self, PyObject *args)
{
PyObject* tag = Py_None;
if (!PyArg_ParseTuple(args, "|O:iter", &tag))
return NULL; return NULL;
Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self); return create_elementiter(self, tag, 0);
Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
result = PyObject_CallObject(elementtree_iter_obj, args);
Py_DECREF(args);
return result;
} }
static PyObject* static PyObject*
element_itertext(ElementObject* self, PyObject* args) element_itertext(ElementObject* self, PyObject* args)
{ {
PyObject* result;
if (!PyArg_ParseTuple(args, ":itertext")) if (!PyArg_ParseTuple(args, ":itertext"))
return NULL; return NULL;
if (!elementtree_itertext_obj) { return create_elementiter(self, Py_None, 1);
PyErr_SetString(
PyExc_RuntimeError,
"itertext helper not found"
);
return NULL;
}
args = PyTuple_New(1);
if (!args)
return NULL;
Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
result = PyObject_CallObject(elementtree_itertext_obj, args);
Py_DECREF(args);
return result;
} }
static PyObject* static PyObject*
element_getitem(PyObject* self_, Py_ssize_t index) element_getitem(PyObject* self_, Py_ssize_t index)
{ {
...@@ -1790,6 +1753,267 @@ static PyTypeObject Element_Type = { ...@@ -1790,6 +1753,267 @@ static PyTypeObject Element_Type = {
0, /* tp_free */ 0, /* tp_free */
}; };
/******************************* Element iterator ****************************/
/* ElementIterObject represents the iteration state over an XML element in
* pre-order traversal. To keep track of which sub-element should be returned
* next, a stack of parents is maintained. This is a standard stack-based
* iterative pre-order traversal of a tree.
* The stack is managed using a single-linked list starting at parent_stack.
* Each stack node contains the saved parent to which we should return after
* the current one is exhausted, and the next child to examine in that parent.
*/
typedef struct ParentLocator_t {
ElementObject *parent;
Py_ssize_t child_index;
struct ParentLocator_t *next;
} ParentLocator;
typedef struct {
PyObject_HEAD
ParentLocator *parent_stack;
ElementObject *root_element;
PyObject *sought_tag;
int root_done;
int gettext;
} ElementIterObject;
static void
elementiter_dealloc(ElementIterObject *it)
{
ParentLocator *p = it->parent_stack;
while (p) {
ParentLocator *temp = p;
Py_XDECREF(p->parent);
p = p->next;
PyObject_Free(temp);
}
Py_XDECREF(it->sought_tag);
Py_XDECREF(it->root_element);
PyObject_GC_UnTrack(it);
PyObject_GC_Del(it);
}
static int
elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
{
ParentLocator *p = it->parent_stack;
while (p) {
Py_VISIT(p->parent);
p = p->next;
}
Py_VISIT(it->root_element);
Py_VISIT(it->sought_tag);
return 0;
}
/* Helper function for elementiter_next. Add a new parent to the parent stack.
*/
static ParentLocator *
parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
{
ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
if (new_node) {
new_node->parent = parent;
Py_INCREF(parent);
new_node->child_index = 0;
new_node->next = stack;
}
return new_node;
}
static PyObject *
elementiter_next(ElementIterObject *it)
{
/* Sub-element iterator.
*
* A short note on gettext: this function serves both the iter() and
* itertext() methods to avoid code duplication. However, there are a few
* small differences in the way these iterations work. Namely:
* - itertext() only yields text from nodes that have it, and continues
* iterating when a node doesn't have text (so it doesn't return any
* node like iter())
* - itertext() also has to handle tail, after finishing with all the
* children of a node.
*/
while (1) {
/* Handle the case reached in the beginning and end of iteration, where
* the parent stack is empty. The root_done flag gives us indication
* whether we've just started iterating (so root_done is 0), in which
* case the root is returned. If root_done is 1 and we're here, the
* iterator is exhausted.
*/
if (!it->parent_stack->parent) {
if (it->root_done) {
PyErr_SetNone(PyExc_StopIteration);
return NULL;
} else {
it->parent_stack = parent_stack_push_new(it->parent_stack,
it->root_element);
if (!it->parent_stack) {
PyErr_NoMemory();
return NULL;
}
it->root_done = 1;
if (it->sought_tag == Py_None ||
PyObject_RichCompareBool(it->root_element->tag,
it->sought_tag, Py_EQ) == 1) {
if (it->gettext) {
PyObject *text = JOIN_OBJ(it->root_element->text);
if (PyObject_IsTrue(text)) {
Py_INCREF(text);
return text;
}
} else {
Py_INCREF(it->root_element);
return (PyObject *)it->root_element;
}
}
}
}
/* See if there are children left to traverse in the current parent. If
* yes, visit the next child. If not, pop the stack and try again.
*/
ElementObject *cur_parent = it->parent_stack->parent;
Py_ssize_t child_index = it->parent_stack->child_index;
if (cur_parent->extra && child_index < cur_parent->extra->length) {
ElementObject *child = (ElementObject *)
cur_parent->extra->children[child_index];
it->parent_stack->child_index++;
it->parent_stack = parent_stack_push_new(it->parent_stack,
child);
if (!it->parent_stack) {
PyErr_NoMemory();
return NULL;
}
if (it->gettext) {
PyObject *text = JOIN_OBJ(child->text);
if (PyObject_IsTrue(text)) {
Py_INCREF(text);
return text;
}
} else if (it->sought_tag == Py_None ||
PyObject_RichCompareBool(child->tag,
it->sought_tag, Py_EQ) == 1) {
Py_INCREF(child);
return (PyObject *)child;
}
else
continue;
}
else {
PyObject *tail = it->gettext ? JOIN_OBJ(cur_parent->tail) : Py_None;
ParentLocator *next = it->parent_stack->next;
Py_XDECREF(it->parent_stack->parent);
PyObject_Free(it->parent_stack);
it->parent_stack = next;
/* Note that extra condition on it->parent_stack->parent here;
* this is because itertext() is supposed to only return *inner*
* text, not text following the element it began iteration with.
*/
if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
Py_INCREF(tail);
return tail;
}
}
}
return NULL;
}
static PyTypeObject ElementIter_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"_elementtree._element_iterator", /* tp_name */
sizeof(ElementIterObject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)elementiter_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
0, /* tp_doc */
(traverseproc)elementiter_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
(iternextfunc)elementiter_next, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
};
static PyObject *
create_elementiter(ElementObject *self, PyObject *tag, int gettext)
{
ElementIterObject *it;
PyObject *star = NULL;
it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
if (!it)
return NULL;
if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
PyObject_GC_Del(it);
return NULL;
}
it->parent_stack->parent = NULL;
it->parent_stack->child_index = 0;
it->parent_stack->next = NULL;
if (PyUnicode_Check(tag))
star = PyUnicode_FromString("*");
else if (PyBytes_Check(tag))
star = PyBytes_FromString("*");
if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
tag = Py_None;
Py_XDECREF(star);
it->sought_tag = tag;
it->root_done = 0;
it->gettext = gettext;
it->root_element = self;
Py_INCREF(self);
Py_INCREF(tag);
PyObject_GC_Track(it);
return (PyObject *)it;
}
/* ==================================================================== */ /* ==================================================================== */
/* the tree builder type */ /* the tree builder type */
...@@ -3238,8 +3462,7 @@ static struct PyModuleDef _elementtreemodule = { ...@@ -3238,8 +3462,7 @@ static struct PyModuleDef _elementtreemodule = {
PyMODINIT_FUNC PyMODINIT_FUNC
PyInit__elementtree(void) PyInit__elementtree(void)
{ {
PyObject *m, *g, *temp; PyObject *m, *temp;
char* bootstrap;
/* Initialize object types */ /* Initialize object types */
if (PyType_Ready(&TreeBuilder_Type) < 0) if (PyType_Ready(&TreeBuilder_Type) < 0)
...@@ -3255,44 +3478,6 @@ PyInit__elementtree(void) ...@@ -3255,44 +3478,6 @@ PyInit__elementtree(void)
if (!m) if (!m)
return NULL; return NULL;
/* The code below requires that the module gets already added
to sys.modules. */
PyDict_SetItemString(PyImport_GetModuleDict(),
_elementtreemodule.m_name,
m);
/* python glue code */
g = PyDict_New();
if (!g)
return NULL;
PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
bootstrap = (
"def iter(node, tag=None):\n" /* helper */
" if tag == '*':\n"
" tag = None\n"
" if tag is None or node.tag == tag:\n"
" yield node\n"
" for node in node:\n"
" for node in iter(node, tag):\n"
" yield node\n"
"def itertext(node):\n" /* helper */
" if node.text:\n"
" yield node.text\n"
" for e in node:\n"
" for s in e.itertext():\n"
" yield s\n"
" if e.tail:\n"
" yield e.tail\n"
);
if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
return NULL;
if (!(temp = PyImport_ImportModule("copy"))) if (!(temp = PyImport_ImportModule("copy")))
return NULL; return NULL;
elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy"); elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
...@@ -3301,9 +3486,6 @@ PyInit__elementtree(void) ...@@ -3301,9 +3486,6 @@ PyInit__elementtree(void)
if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath"))) if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
return NULL; return NULL;
elementtree_iter_obj = PyDict_GetItemString(g, "iter");
elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
/* link against pyexpat */ /* link against pyexpat */
expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0); expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
if (expat_capi) { if (expat_capi) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment