Commit e292a245 authored by Lars Gustäbel's avatar Lars Gustäbel

Added EntityResolver and DTDHandler (patch 101631) with test cases.

parent bc1c1c98
......@@ -6,6 +6,8 @@ Passed test_escape_basic
Passed test_escape_extra
Passed test_expat_attrs_empty
Passed test_expat_attrs_wattr
Passed test_expat_dtdhandler
Passed test_expat_entityresolver
Passed test_expat_inpsource_filename
Passed test_expat_inpsource_stream
Passed test_expat_inpsource_sysid
......@@ -20,4 +22,4 @@ Passed test_xmlgen_content_escape
Passed test_xmlgen_ignorable
Passed test_xmlgen_ns
Passed test_xmlgen_pi
21 tests, 0 failures
23 tests, 0 failures
......@@ -156,25 +156,45 @@ class TestDTDHandler:
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
self._entities.append((name, publicId, systemId, ndata))
# def test_expat_dtdhandler():
# parser = create_parser()
# handler = TestDTDHandler()
# parser.setDTDHandler(handler)
# parser.feed('<!DOCTYPE doc [\n')
# parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n')
# parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n')
# parser.feed(']>\n')
# parser.feed('<doc></doc>')
# parser.close()
def test_expat_dtdhandler():
parser = create_parser()
handler = TestDTDHandler()
parser.setDTDHandler(handler)
parser.feed('<!DOCTYPE doc [\n')
parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n')
parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n')
parser.feed(']>\n')
parser.feed('<doc></doc>')
parser.close()
# return handler._notations == [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)] and \
# handler._entities == [("img", None, "expat.gif", "GIF")]
return handler._notations == [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)] and \
handler._entities == [("img", None, "expat.gif", "GIF")]
# ===== EntityResolver support
# can't test this until InputSource is in place
class TestEntityResolver:
def resolveEntity(self, publicId, systemId):
inpsrc = InputSource()
inpsrc.setByteStream(StringIO("<entity/>"))
return inpsrc
def test_expat_entityresolver():
return 1 # disabling this until pyexpat.c has been fixed
parser = create_parser()
parser.setEntityResolver(TestEntityResolver())
result = StringIO()
parser.setContentHandler(XMLGenerator(result))
parser.feed('<!DOCTYPE doc [\n')
parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
parser.feed(']>\n')
parser.feed('<doc>&test;</doc>')
parser.close()
return result.getvalue() == start + "<doc><entity></entity></doc>"
# ===== Attributes support
class AttrGatherer(ContentHandler):
......@@ -440,5 +460,3 @@ for (name, value) in items:
print "%d tests, %d failures" % (tests, fails)
if fails != 0:
raise TestFailed, "%d of %d tests failed" % (fails, tests)
make_test_output()
......@@ -3,17 +3,6 @@ SAX driver for the Pyexpat C module. This driver works with
pyexpat.__version__ == '1.5'.
"""
# Todo on driver:
# - make it support external entities (wait for pyexpat.c)
# - enable configuration between reset() and feed() calls
# - support lexical events?
# - proper inputsource handling
# - properties and features
# Todo on pyexpat.c:
# - support XML_ExternalEntityParserCreate
# - exceptions in callouts from pyexpat to python code lose position info
version = "0.20"
from xml.sax._exceptions import *
......@@ -30,10 +19,11 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
def __init__(self, namespaceHandling=0, bufsize=2**16-20):
xmlreader.IncrementalParser.__init__(self, bufsize)
self._source = None
self._source = xmlreader.InputSource()
self._parser = None
self._namespaces = namespaceHandling
self._parsing = 0
self._entity_stack = []
# XMLReader methods
......@@ -186,11 +176,23 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
self._dtd_handler.notationDecl(name, pubid, sysid)
def external_entity_ref(self, context, base, sysid, pubid):
raise NotImplementedError()
source = self._ent_handler.resolveEntity(pubid, sysid)
source = saxutils.prepare_input_source(source)
# FIXME: create new parser, stack self._source and self._parser
# FIXME: reuse code from self.parse(...)
source = saxutils.prepare_input_source(source,
self._source.getSystemId() or
"")
self._entity_stack.append((self._parser, self._source))
self._parser = self._parser.ExternalEntityParserCreate(context)
self._source = source
try:
xmlreader.IncrementalParser.parse(self, source)
self.close()
except:
return 0 # FIXME: save error info here?
(self._parser, self._source) = self._entity_stack[-1]
del self._entity_stack[-1]
return 1
# ---
......
......@@ -17,7 +17,7 @@ version = '2.0beta'
#
#============================================================================
# ===== ErrorHandler =====
# ===== ERRORHANDLER =====
class ErrorHandler:
"""Basic interface for SAX error handlers. If you create an object
......@@ -40,6 +40,7 @@ class ErrorHandler:
"Handle a warning."
print exception
# ===== CONTENTHANDLER =====
class ContentHandler:
......@@ -199,7 +200,39 @@ class ContentHandler:
http://xml.org/sax/features/external-general-entities and the
http://xml.org/sax/features/external-parameter-entities
properties."""
# ===== DTDHandler =====
class DTDHandler:
"""Handle DTD events.
This interface specifies only those DTD events required for basic
parsing (unparsed entities and attributes)."""
def notationDecl(self, name, publicId, systemId):
"Handle a notation declaration event."
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
"Handle an unparsed entity declaration event."
# ===== ENTITYRESOLVER =====
class EntityResolver:
"""Basic interface for resolving entities. If you create an object
implementing this interface, then register the object with your
Parser, the parser will call the method in your object to
resolve all external entities. Note that DefaultHandler implements
this interface with the default behaviour."""
def resolveEntity(self, publicId, systemId):
"""Resolve the system identifier of an entity and return either
the system identifier to read from as a string, or an InputSource
to read from."""
return systemId
#============================================================================
#
# CORE FEATURES
......
......@@ -9,8 +9,8 @@ class XMLReader:
def __init__(self):
self._cont_handler = handler.ContentHandler()
#self._dtd_handler = handler.DTDHandler()
#self._ent_handler = handler.EntityResolver()
self._dtd_handler = handler.DTDHandler()
self._ent_handler = handler.EntityResolver()
self._err_handler = handler.ErrorHandler()
def parse(self, source):
......@@ -109,8 +109,6 @@ class IncrementalParser(XMLReader):
while buffer != "":
self.feed(buffer)
buffer = file.read(self._bufsize)
self.reset()
def feed(self, data):
"""This method gives the raw XML data in the data parameter to
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment