Commit 12354a76 authored by Vincent Pelletier's avatar Vincent Pelletier

ERP5Type: Add XMLUtils.

parent 27f4b03d
from xml.etree.ElementTree import iterparse, Element, TreeBuilder, XMLParser
from .XMLMatrix import INFINITE_SET
class RestrictedElement(Element):
__allow_access_to_unprotected_subobjects__ = 1
class ChildDiscardingElement(RestrictedElement):
"""
Ignores any children added to it.
Useful to parse large files, so as many-children nodes actually discards
them to free memory.
"""
def __init__(self, tag, expected_tag_set):
super(ChildDiscardingElement, self).__init__(tag)
self.__expected_tag_set = expected_tag_set
def __single(self, child):
if not (
isinstance(child, ChildDiscardingElement) or
child.tag in self.__expected_tag_set
):
raise ValueError('Unexpected discarded tag: %r (inside %r)' % (
child.tag,
self.tag,
))
append = __single
def extend(self, elements):
for child in elements:
self.__single(child)
def insert(self, index, element):
self.__single(element)
def parseStream(stream, child_discard_set, callback_dict, catchall=None):
"""
stream (opened read stream)
Where XML data is read from. Expects a "read" method following the
"file" API.
child_discard_set (anything implementing '__in__')
Set of tags whose children should be discarded while parsing.
callback_dict (dict)
Dict of callables per event and tag type, to be called for these
combinations when encountered. Expected structure is:
{
(event, tag): callable(element) -> None,
}
See xml.etree.ElementTree.iterparse for a list of possible events.
catchall (None, callable(element) -> None)
Callback triggered for all actually triggered events but not declared in
callback_dict.
If None, a ValueError exception will be raised when an element whose tag is
present in child_discard_set receives a non-child-discarding child for
which neither "start" nor "end" callback exist.
"""
if catchall is None:
catchall = lambda x: None
callback_set = set(
y for x, y in callback_dict
if x in ('start', 'end', None) # None is equivalent to 'end'
)
else:
callback_set = INFINITE_SET
def elementFactory(tag, attrs):
if tag in child_discard_set:
return ChildDiscardingElement(tag, callback_set)
return RestrictedElement(tag, attrs)
for event, elem in iterparse(
stream,
events=set([x for x, _ in callback_dict.iterkeys()]),
parser=XMLParser(
target=TreeBuilder(
element_factory=elementFactory,
),
),
):
callback_dict.get((event, elem.tag), catchall)(elem)
\ No newline at end of file
...@@ -180,6 +180,7 @@ ModuleSecurityInfo('Products.ERP5Type.Constraint').declarePublic('PropertyTypeVa ...@@ -180,6 +180,7 @@ ModuleSecurityInfo('Products.ERP5Type.Constraint').declarePublic('PropertyTypeVa
ModuleSecurityInfo('Products.ERP5Type.collections').declarePublic('OrderedDict') ModuleSecurityInfo('Products.ERP5Type.collections').declarePublic('OrderedDict')
ModuleSecurityInfo('Products.ERP5Type.DiffUtils').declarePublic('DiffFile') ModuleSecurityInfo('Products.ERP5Type.DiffUtils').declarePublic('DiffFile')
ModuleSecurityInfo('pprint').declarePublic('pformat', 'pprint') ModuleSecurityInfo('pprint').declarePublic('pformat', 'pprint')
ModuleSecurityInfo('Products.ERP5Type.XMLUtils').declarePublic('parseStream')
import zExceptions import zExceptions
ModuleSecurityInfo('zExceptions').declarePublic(*filter( ModuleSecurityInfo('zExceptions').declarePublic(*filter(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment