Commit 91d5193b authored by Florent Xicluna's avatar Florent Xicluna

Closes #2892: preserve iterparse events in case of SyntaxError.

parent c1e73c30
......@@ -754,6 +754,7 @@ def iterparse():
... print(action, elem.tag)
... except ET.ParseError as v:
... print(v)
end document
junk after document element: line 1, column 12
"""
......
......@@ -1250,6 +1250,7 @@ class _IterParseIterator:
self._close_file = close_source
self._events = []
self._index = 0
self._error = None
self.root = self._root = None
self._parser = parser
# wire up the parser for event reporting
......@@ -1291,24 +1292,31 @@ class _IterParseIterator:
while 1:
try:
item = self._events[self._index]
self._index += 1
return item
except IndexError:
if self._parser is None:
self.root = self._root
if self._close_file:
self._file.close()
raise StopIteration
# load event buffer
del self._events[:]
self._index = 0
data = self._file.read(16384)
if data:
pass
if self._error:
e = self._error
self._error = None
raise e
if self._parser is None:
self.root = self._root
if self._close_file:
self._file.close()
raise StopIteration
# load event buffer
del self._events[:]
self._index = 0
data = self._file.read(16384)
if data:
try:
self._parser.feed(data)
else:
self._root = self._parser.close()
self._parser = None
except SyntaxError as exc:
self._error = exc
else:
self._index = self._index + 1
return item
self._root = self._parser.close()
self._parser = None
def __iter__(self):
return self
......
......@@ -66,6 +66,8 @@ Core and Builtins
Library
-------
- Issue #2892: preserve iterparse events in case of SyntaxError.
- Issue #670664: Fix HTMLParser to correctly handle the content of
``<script>...</script>`` and ``<style>...</style>``.
......
......@@ -3000,6 +3000,7 @@ PyInit__elementtree(void)
" self._file = file\n"
" self._events = []\n"
" self._index = 0\n"
" self._error = None\n"
" self.root = self._root = None\n"
" b = cElementTree.TreeBuilder()\n"
" self._parser = cElementTree.XMLParser(b)\n"
......@@ -3008,24 +3009,31 @@ PyInit__elementtree(void)
" while 1:\n"
" try:\n"
" item = self._events[self._index]\n"
" self._index += 1\n"
" return item\n"
" except IndexError:\n"
" if self._parser is None:\n"
" self.root = self._root\n"
" if self._close_file:\n"
" self._file.close()\n"
" raise StopIteration\n"
" # load event buffer\n"
" del self._events[:]\n"
" self._index = 0\n"
" data = self._file.read(16384)\n"
" if data:\n"
" pass\n"
" if self._error:\n"
" e = self._error\n"
" self._error = None\n"
" raise e\n"
" if self._parser is None:\n"
" self.root = self._root\n"
" if self._close_file:\n"
" self._file.close()\n"
" raise StopIteration\n"
" # load event buffer\n"
" del self._events[:]\n"
" self._index = 0\n"
" data = self._file.read(16384)\n"
" if data:\n"
" try:\n"
" self._parser.feed(data)\n"
" else:\n"
" self._root = self._parser.close()\n"
" self._parser = None\n"
" except SyntaxError as exc:\n"
" self._error = exc\n"
" else:\n"
" self._index = self._index + 1\n"
" return item\n"
" self._root = self._parser.close()\n"
" self._parser = None\n"
" def __iter__(self):\n"
" return self\n"
"cElementTree.iterparse = iterparse\n"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment