Commit c16adce2 authored by Fred Drake's avatar Fred Drake

Adjust PullDOM to use a DOMImplementation instance to create new Document

objects; uses minidom if one is not provided to the constructor.

parse():  Pick up the default_bufsize default value dynamically so that
          the value in the module may be (meaningfully) changed at runtime.

This (partially) closes patch #102477.
parent adf5410d
import minidom
import xml.sax,xml.sax.handler
import xml.sax
import xml.sax.handler
START_ELEMENT = "START_ELEMENT"
END_ELEMENT = "END_ELEMENT"
......@@ -11,23 +11,28 @@ IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
CHARACTERS = "CHARACTERS"
class PullDOM(xml.sax.ContentHandler):
def __init__(self):
_locator = None
document = None
def __init__(self, documentFactory=None):
self.documentFactory = documentFactory
self.firstEvent = [None, None]
self.lastEvent = self.firstEvent
self._ns_contexts = [{}] # contains uri -> prefix dicts
self._current_context = self._ns_contexts[-1]
def setDocumentLocator(self, locator): pass
def setDocumentLocator(self, locator):
self._locator = locator
def startPrefixMapping(self, prefix, uri):
self._ns_contexts.append(self._current_context.copy())
self._current_context[uri] = prefix
self._current_context[uri] = prefix or ''
def endPrefixMapping(self, prefix):
del self._ns_contexts[-1]
self._current_context = self._ns_contexts.pop()
def startElementNS(self, name, tagName , attrs):
uri,localname = name
uri, localname = name
if uri:
# When using namespaces, the reader may or may not
# provide us with the original name. If not, create
......@@ -50,8 +55,7 @@ class PullDOM(xml.sax.ContentHandler):
attr.value = value
node.setAttributeNode(attr)
parent = self.curNode
node.parentNode = parent
node.parentNode = self.curNode
self.curNode = node
self.lastEvent[1] = [(START_ELEMENT, node), None]
......@@ -63,7 +67,7 @@ class PullDOM(xml.sax.ContentHandler):
self.lastEvent[1] = [(END_ELEMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((END_ELEMENT, node))
self.curNode = node.parentNode
self.curNode = self.curNode.parentNode
def startElement(self, name, attrs):
node = self.document.createElement(name)
......@@ -73,8 +77,7 @@ class PullDOM(xml.sax.ContentHandler):
attr.value = value
node.setAttributeNode(attr)
parent = self.curNode
node.parentNode = parent
node.parentNode = self.curNode
self.curNode = node
self.lastEvent[1] = [(START_ELEMENT, node), None]
......@@ -106,7 +109,7 @@ class PullDOM(xml.sax.ContentHandler):
#self.events.append((PROCESSING_INSTRUCTION, node))
def ignorableWhitespace(self, chars):
node = self.document.createTextNode(chars[start:start + length])
node = self.document.createTextNode(chars)
parent = self.curNode
node.parentNode = parent
self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
......@@ -121,20 +124,25 @@ class PullDOM(xml.sax.ContentHandler):
self.lastEvent = self.lastEvent[1]
def startDocument(self):
node = self.curNode = self.document = minidom.Document()
node.parentNode = None
publicId = systemId = None
if self._locator:
publicId = self._locator.getPublicId()
systemId = self._locator.getSystemId()
if self.documentFactory is None:
import xml.dom.minidom
self.documentFactory = xml.dom.minidom.Document.implementation
node = self.documentFactory.createDocument(None, publicId, systemId)
self.curNode = self.document = node
self.lastEvent[1] = [(START_DOCUMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((START_DOCUMENT, node))
def endDocument(self):
assert not self.curNode.parentNode
for node in self.curNode.childNodes:
if node.nodeType == node.ELEMENT_NODE:
self.document.documentElement = node
#if not self.document.documentElement:
# raise Error, "No document element"
assert self.curNode.parentNode is None, \
"not all elements have been properly closed"
assert self.curNode.documentElement is not None, \
"document does not contain a root element"
node = self.curNode.documentElement
self.lastEvent[1] = [(END_DOCUMENT, node), None]
#self.events.append((END_DOCUMENT, self.curNode))
......@@ -156,7 +164,7 @@ class DOMEventStream:
def reset(self):
self.pulldom = PullDOM()
# This content handler relies on namespace support
self.parser.setFeature(xml.sax.handler.feature_namespaces,1)
self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
self.parser.setContentHandler(self.pulldom)
def __getitem__(self, pos):
......@@ -179,7 +187,7 @@ class DOMEventStream:
if not self.pulldom.firstEvent[1]:
self.pulldom.lastEvent = self.pulldom.firstEvent
while not self.pulldom.firstEvent[1]:
buf=self.stream.read(self.bufsize)
buf = self.stream.read(self.bufsize)
if not buf:
#FIXME: why doesn't Expat close work?
#self.parser.close()
......@@ -214,10 +222,13 @@ class SAX2DOM(PullDOM):
node = self.lastEvent[0][1]
node.parentNode.appendChild(node)
default_bufsize = (2 ** 14) - 20
def parse(stream_or_string, parser=None, bufsize=default_bufsize):
if type(stream_or_string) is type(""):
def parse(stream_or_string, parser=None, bufsize=None):
if bufsize is None:
bufsize = default_bufsize
if type(stream_or_string) in [type(""), type(u"")]:
stream = open(stream_or_string)
else:
stream = stream_or_string
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment