Commit c16adce2 authored by Fred Drake's avatar Fred Drake

Adjust PullDOM to use a DOMImplementation instance to create new Document

objects; uses minidom if one is not provided to the constructor.

parse():  Pick up the default_bufsize default value dynamically so that
          the value in the module may be (meaningfully) changed at runtime.

This (partially) closes patch #102477.
parent adf5410d
import minidom import xml.sax
import xml.sax,xml.sax.handler import xml.sax.handler
START_ELEMENT = "START_ELEMENT" START_ELEMENT = "START_ELEMENT"
END_ELEMENT = "END_ELEMENT" END_ELEMENT = "END_ELEMENT"
...@@ -11,23 +11,28 @@ IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE" ...@@ -11,23 +11,28 @@ IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
CHARACTERS = "CHARACTERS" CHARACTERS = "CHARACTERS"
class PullDOM(xml.sax.ContentHandler): class PullDOM(xml.sax.ContentHandler):
def __init__(self): _locator = None
document = None
def __init__(self, documentFactory=None):
self.documentFactory = documentFactory
self.firstEvent = [None, None] self.firstEvent = [None, None]
self.lastEvent = self.firstEvent self.lastEvent = self.firstEvent
self._ns_contexts = [{}] # contains uri -> prefix dicts self._ns_contexts = [{}] # contains uri -> prefix dicts
self._current_context = self._ns_contexts[-1] self._current_context = self._ns_contexts[-1]
def setDocumentLocator(self, locator): pass def setDocumentLocator(self, locator):
self._locator = locator
def startPrefixMapping(self, prefix, uri): def startPrefixMapping(self, prefix, uri):
self._ns_contexts.append(self._current_context.copy()) self._ns_contexts.append(self._current_context.copy())
self._current_context[uri] = prefix self._current_context[uri] = prefix or ''
def endPrefixMapping(self, prefix): def endPrefixMapping(self, prefix):
del self._ns_contexts[-1] self._current_context = self._ns_contexts.pop()
def startElementNS(self, name, tagName , attrs): def startElementNS(self, name, tagName , attrs):
uri,localname = name uri, localname = name
if uri: if uri:
# When using namespaces, the reader may or may not # When using namespaces, the reader may or may not
# provide us with the original name. If not, create # provide us with the original name. If not, create
...@@ -50,8 +55,7 @@ class PullDOM(xml.sax.ContentHandler): ...@@ -50,8 +55,7 @@ class PullDOM(xml.sax.ContentHandler):
attr.value = value attr.value = value
node.setAttributeNode(attr) node.setAttributeNode(attr)
parent = self.curNode node.parentNode = self.curNode
node.parentNode = parent
self.curNode = node self.curNode = node
self.lastEvent[1] = [(START_ELEMENT, node), None] self.lastEvent[1] = [(START_ELEMENT, node), None]
...@@ -63,7 +67,7 @@ class PullDOM(xml.sax.ContentHandler): ...@@ -63,7 +67,7 @@ class PullDOM(xml.sax.ContentHandler):
self.lastEvent[1] = [(END_ELEMENT, node), None] self.lastEvent[1] = [(END_ELEMENT, node), None]
self.lastEvent = self.lastEvent[1] self.lastEvent = self.lastEvent[1]
#self.events.append((END_ELEMENT, node)) #self.events.append((END_ELEMENT, node))
self.curNode = node.parentNode self.curNode = self.curNode.parentNode
def startElement(self, name, attrs): def startElement(self, name, attrs):
node = self.document.createElement(name) node = self.document.createElement(name)
...@@ -73,8 +77,7 @@ class PullDOM(xml.sax.ContentHandler): ...@@ -73,8 +77,7 @@ class PullDOM(xml.sax.ContentHandler):
attr.value = value attr.value = value
node.setAttributeNode(attr) node.setAttributeNode(attr)
parent = self.curNode node.parentNode = self.curNode
node.parentNode = parent
self.curNode = node self.curNode = node
self.lastEvent[1] = [(START_ELEMENT, node), None] self.lastEvent[1] = [(START_ELEMENT, node), None]
...@@ -106,7 +109,7 @@ class PullDOM(xml.sax.ContentHandler): ...@@ -106,7 +109,7 @@ class PullDOM(xml.sax.ContentHandler):
#self.events.append((PROCESSING_INSTRUCTION, node)) #self.events.append((PROCESSING_INSTRUCTION, node))
def ignorableWhitespace(self, chars): def ignorableWhitespace(self, chars):
node = self.document.createTextNode(chars[start:start + length]) node = self.document.createTextNode(chars)
parent = self.curNode parent = self.curNode
node.parentNode = parent node.parentNode = parent
self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None] self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
...@@ -121,20 +124,25 @@ class PullDOM(xml.sax.ContentHandler): ...@@ -121,20 +124,25 @@ class PullDOM(xml.sax.ContentHandler):
self.lastEvent = self.lastEvent[1] self.lastEvent = self.lastEvent[1]
def startDocument(self): def startDocument(self):
node = self.curNode = self.document = minidom.Document() publicId = systemId = None
node.parentNode = None if self._locator:
publicId = self._locator.getPublicId()
systemId = self._locator.getSystemId()
if self.documentFactory is None:
import xml.dom.minidom
self.documentFactory = xml.dom.minidom.Document.implementation
node = self.documentFactory.createDocument(None, publicId, systemId)
self.curNode = self.document = node
self.lastEvent[1] = [(START_DOCUMENT, node), None] self.lastEvent[1] = [(START_DOCUMENT, node), None]
self.lastEvent = self.lastEvent[1] self.lastEvent = self.lastEvent[1]
#self.events.append((START_DOCUMENT, node)) #self.events.append((START_DOCUMENT, node))
def endDocument(self): def endDocument(self):
assert not self.curNode.parentNode assert self.curNode.parentNode is None, \
for node in self.curNode.childNodes: "not all elements have been properly closed"
if node.nodeType == node.ELEMENT_NODE: assert self.curNode.documentElement is not None, \
self.document.documentElement = node "document does not contain a root element"
#if not self.document.documentElement: node = self.curNode.documentElement
# raise Error, "No document element"
self.lastEvent[1] = [(END_DOCUMENT, node), None] self.lastEvent[1] = [(END_DOCUMENT, node), None]
#self.events.append((END_DOCUMENT, self.curNode)) #self.events.append((END_DOCUMENT, self.curNode))
...@@ -156,7 +164,7 @@ class DOMEventStream: ...@@ -156,7 +164,7 @@ class DOMEventStream:
def reset(self): def reset(self):
self.pulldom = PullDOM() self.pulldom = PullDOM()
# This content handler relies on namespace support # This content handler relies on namespace support
self.parser.setFeature(xml.sax.handler.feature_namespaces,1) self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
self.parser.setContentHandler(self.pulldom) self.parser.setContentHandler(self.pulldom)
def __getitem__(self, pos): def __getitem__(self, pos):
...@@ -179,7 +187,7 @@ class DOMEventStream: ...@@ -179,7 +187,7 @@ class DOMEventStream:
if not self.pulldom.firstEvent[1]: if not self.pulldom.firstEvent[1]:
self.pulldom.lastEvent = self.pulldom.firstEvent self.pulldom.lastEvent = self.pulldom.firstEvent
while not self.pulldom.firstEvent[1]: while not self.pulldom.firstEvent[1]:
buf=self.stream.read(self.bufsize) buf = self.stream.read(self.bufsize)
if not buf: if not buf:
#FIXME: why doesn't Expat close work? #FIXME: why doesn't Expat close work?
#self.parser.close() #self.parser.close()
...@@ -214,10 +222,13 @@ class SAX2DOM(PullDOM): ...@@ -214,10 +222,13 @@ class SAX2DOM(PullDOM):
node = self.lastEvent[0][1] node = self.lastEvent[0][1]
node.parentNode.appendChild(node) node.parentNode.appendChild(node)
default_bufsize = (2 ** 14) - 20 default_bufsize = (2 ** 14) - 20
def parse(stream_or_string, parser=None, bufsize=default_bufsize): def parse(stream_or_string, parser=None, bufsize=None):
if type(stream_or_string) is type(""): if bufsize is None:
bufsize = default_bufsize
if type(stream_or_string) in [type(""), type(u"")]:
stream = open(stream_or_string) stream = open(stream_or_string)
else: else:
stream = stream_or_string stream = stream_or_string
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment