Commit a413040f authored by Martin v. Löwis's avatar Martin v. Löwis

Merge with PyXML 1.28:

Wrap a lot of long lines.
Clean up a handler for expat.error.

If a lexical handler is set, make sure we call the startDTD() and
endDTD().  If the lexical handler is unset (by setting it to None),
remove the handlers from the underlying pyexpat parser object.
Closes SF bug #485584.

In namespaces mode, make sure we set up the qnames dictionary
correctly for the AttributesNSImpl instance passed to the
start-element-handler.
Closes SF bug #563399.

Support skippedEntity. Fixes #665486.
parent 3d313727
""" """
SAX driver for the Pyexpat C module. This driver works with SAX driver for the pyexpat C module. This driver works with
pyexpat.__version__ == '2.22'. pyexpat.__version__ == '2.22'.
""" """
...@@ -82,7 +82,7 @@ class ExpatLocator(xmlreader.Locator): ...@@ -82,7 +82,7 @@ class ExpatLocator(xmlreader.Locator):
# --- ExpatParser # --- ExpatParser
class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
"SAX driver for the Pyexpat C module." """SAX driver for the pyexpat C module."""
def __init__(self, namespaceHandling=0, bufsize=2**16-20): def __init__(self, namespaceHandling=0, bufsize=2**16-20):
xmlreader.IncrementalParser.__init__(self, bufsize) xmlreader.IncrementalParser.__init__(self, bufsize)
...@@ -110,7 +110,7 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): ...@@ -110,7 +110,7 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
if source.getSystemId() != None: if source.getSystemId() != None:
self._parser.SetBase(source.getSystemId()) self._parser.SetBase(source.getSystemId())
# Redefined setContentHandle to allow changing handlers during parsing # Redefined setContentHandler to allow changing handlers during parsing
def setContentHandler(self, handler): def setContentHandler(self, handler):
xmlreader.IncrementalParser.setContentHandler(self, handler) xmlreader.IncrementalParser.setContentHandler(self, handler)
...@@ -145,16 +145,19 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): ...@@ -145,16 +145,19 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
self._interning = None self._interning = None
elif name == feature_validation: elif name == feature_validation:
if state: if state:
raise SAXNotSupportedException("expat does not support validation") raise SAXNotSupportedException(
"expat does not support validation")
elif name == feature_external_pes: elif name == feature_external_pes:
if state: if state:
raise SAXNotSupportedException("expat does not read external parameter entities") raise SAXNotSupportedException(
"expat does not read external parameter entities")
elif name == feature_namespace_prefixes: elif name == feature_namespace_prefixes:
if state: if state:
raise SAXNotSupportedException("expat does not report namespace prefixes") raise SAXNotSupportedException(
"expat does not report namespace prefixes")
else: else:
raise SAXNotRecognizedException("Feature '%s' not recognized" % raise SAXNotRecognizedException(
name) "Feature '%s' not recognized" % name)
def getProperty(self, name): def getProperty(self, name):
if name == handler.property_lexical_handler: if name == handler.property_lexical_handler:
...@@ -166,9 +169,12 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): ...@@ -166,9 +169,12 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
if hasattr(self._parser, "GetInputContext"): if hasattr(self._parser, "GetInputContext"):
return self._parser.GetInputContext() return self._parser.GetInputContext()
else: else:
raise SAXNotRecognizedException("This version of expat does not support getting the XML string") raise SAXNotRecognizedException(
"This version of expat does not support getting"
" the XML string")
else: else:
raise SAXNotSupportedException("XML string cannot be returned when not parsing") raise SAXNotSupportedException(
"XML string cannot be returned when not parsing")
raise SAXNotRecognizedException("Property '%s' not recognized" % name) raise SAXNotRecognizedException("Property '%s' not recognized" % name)
def setProperty(self, name, value): def setProperty(self, name, value):
...@@ -199,9 +205,8 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): ...@@ -199,9 +205,8 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
# document. When feeding chunks, they are not normally final - # document. When feeding chunks, they are not normally final -
# except when invoked from close. # except when invoked from close.
self._parser.Parse(data, isFinal) self._parser.Parse(data, isFinal)
except expat.error: except expat.error, e:
error_code = self._parser.ErrorCode exc = SAXParseException(expat.ErrorString(e.code), e, self)
exc = SAXParseException(expat.ErrorString(error_code), None, self)
# FIXME: when to invoke error()? # FIXME: when to invoke error()?
self._err_handler.fatalError(exc) self._err_handler.fatalError(exc)
...@@ -221,13 +226,26 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): ...@@ -221,13 +226,26 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
self._parser.CharacterDataHandler = self._cont_handler.characters self._parser.CharacterDataHandler = self._cont_handler.characters
def _reset_lex_handler_prop(self): def _reset_lex_handler_prop(self):
self._parser.CommentHandler = self._lex_handler_prop.comment lex = self._lex_handler_prop
self._parser.StartCdataSectionHandler = self._lex_handler_prop.startCDATA parser = self._parser
self._parser.EndCdataSectionHandler = self._lex_handler_prop.endCDATA if lex is None:
parser.CommentHandler = None
parser.StartCdataSectionHandler = None
parser.EndCdataSectionHandler = None
parser.StartDoctypeDeclHandler = None
parser.EndDoctypeDeclHandler = None
else:
parser.CommentHandler = lex.comment
parser.StartCdataSectionHandler = lex.startCDATA
parser.EndCdataSectionHandler = lex.endCDATA
parser.StartDoctypeDeclHandler = self.start_doctype_decl
parser.EndDoctypeDeclHandler = lex.endDTD
def reset(self): def reset(self):
if self._namespaces: if self._namespaces:
self._parser = expat.ParserCreate(None, " ", intern = self._interning) self._parser = expat.ParserCreate(None, " ",
intern=self._interning)
self._parser.namespace_prefixes = 1
self._parser.StartElementHandler = self.start_element_ns self._parser.StartElementHandler = self.start_element_ns
self._parser.EndElementHandler = self.end_element_ns self._parser.EndElementHandler = self.end_element_ns
else: else:
...@@ -248,7 +266,13 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): ...@@ -248,7 +266,13 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
# self._parser.DefaultHandlerExpand = # self._parser.DefaultHandlerExpand =
# self._parser.NotStandaloneHandler = # self._parser.NotStandaloneHandler =
self._parser.ExternalEntityRefHandler = self.external_entity_ref self._parser.ExternalEntityRefHandler = self.external_entity_ref
self._parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) try:
self._parser.SkippedEntityHandler = self.skipped_entity_handler
except AttributeError:
# This pyexpat does not support SkippedEntity
pass
self._parser.SetParamEntityParsing(
expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
self._parsing = 0 self._parsing = 0
self._entity_stack = [] self._entity_stack = []
...@@ -281,27 +305,43 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): ...@@ -281,27 +305,43 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
def start_element_ns(self, name, attrs): def start_element_ns(self, name, attrs):
pair = name.split() pair = name.split()
if len(pair) == 1: if len(pair) == 1:
# no namespace
pair = (None, name) pair = (None, name)
elif len(pair) == 3:
pair = pair[0], pair[1]
else: else:
# default namespace
pair = tuple(pair) pair = tuple(pair)
newattrs = {} newattrs = {}
qnames = {}
for (aname, value) in attrs.items(): for (aname, value) in attrs.items():
apair = aname.split() parts = aname.split()
if len(apair) == 1: length = len(parts)
if length == 1:
# no namespace
qname = aname
apair = (None, aname) apair = (None, aname)
elif length == 3:
qname = "%s:%s" % (parts[2], parts[1])
apair = parts[0], parts[1]
else: else:
apair = tuple(apair) # default namespace
qname = parts[1]
apair = tuple(parts)
newattrs[apair] = value newattrs[apair] = value
qnames[apair] = qname
self._cont_handler.startElementNS(pair, None, self._cont_handler.startElementNS(pair, None,
AttributesNSImpl(newattrs, {})) AttributesNSImpl(newattrs, qnames))
def end_element_ns(self, name): def end_element_ns(self, name):
pair = name.split() pair = name.split()
if len(pair) == 1: if len(pair) == 1:
pair = (None, name) pair = (None, name)
elif len(pair) == 3:
pair = pair[0], pair[1]
else: else:
pair = tuple(pair) pair = tuple(pair)
...@@ -321,6 +361,9 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): ...@@ -321,6 +361,9 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
def end_namespace_decl(self, prefix): def end_namespace_decl(self, prefix):
self._cont_handler.endPrefixMapping(prefix) self._cont_handler.endPrefixMapping(prefix)
def start_doctype_decl(self, name, pubid, sysid, has_internal_subset):
self._lex_handler_prop.startDTD(name, pubid, sysid)
def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
...@@ -349,6 +392,12 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): ...@@ -349,6 +392,12 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
del self._entity_stack[-1] del self._entity_stack[-1]
return 1 return 1
def skipped_entity_handler(self, name, is_pe):
if is_pe:
# The SAX spec requires to report skipped PEs with a '%'
name = '%'+name
self._cont_handler.skippedEntity(name)
# --- # ---
def create_parser(*args, **kwargs): def create_parser(*args, **kwargs):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment