From c16adce273d20e462d705a7dcdbf17948cc8ba91 Mon Sep 17 00:00:00 2001
From: Fred Drake <fdrake@acm.org>
Date: Thu, 14 Dec 2000 18:00:18 +0000
Subject: [PATCH] Adjust PullDOM to use a DOMImplementation instance to create
 new Document objects; uses minidom if one is not provided to the constructor.

parse():  Pick up the default_bufsize default value dynamically so that
          the value in the module may be (meaningfully) changed at runtime.

This (partially) closes patch #102477.
---
 Lib/xml/dom/pulldom.py | 63 +++++++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 26 deletions(-)

diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py
index 063f7e88022..e674385307e 100644
--- a/Lib/xml/dom/pulldom.py
+++ b/Lib/xml/dom/pulldom.py
@@ -1,5 +1,5 @@
-import minidom
-import xml.sax,xml.sax.handler
+import xml.sax
+import xml.sax.handler
 
 START_ELEMENT = "START_ELEMENT"
 END_ELEMENT = "END_ELEMENT"
@@ -11,23 +11,28 @@ IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
 CHARACTERS = "CHARACTERS"
 
 class PullDOM(xml.sax.ContentHandler):
-    def __init__(self):
+    _locator = None
+    document = None
+
+    def __init__(self, documentFactory=None):
+        self.documentFactory = documentFactory
         self.firstEvent = [None, None]
         self.lastEvent = self.firstEvent
         self._ns_contexts = [{}] # contains uri -> prefix dicts
         self._current_context = self._ns_contexts[-1]
 
-    def setDocumentLocator(self, locator): pass
+    def setDocumentLocator(self, locator):
+        self._locator = locator
 
     def startPrefixMapping(self, prefix, uri):
         self._ns_contexts.append(self._current_context.copy())
-        self._current_context[uri] = prefix
+        self._current_context[uri] = prefix or ''
 
     def endPrefixMapping(self, prefix):
-        del self._ns_contexts[-1]
+        self._current_context = self._ns_contexts.pop()
 
     def startElementNS(self, name, tagName , attrs):
-        uri,localname = name
+        uri, localname = name
         if uri:
             # When using namespaces, the reader may or may not
             # provide us with the original name. If not, create
@@ -50,8 +55,7 @@ class PullDOM(xml.sax.ContentHandler):
             attr.value = value
             node.setAttributeNode(attr)
 
-        parent = self.curNode
-        node.parentNode = parent
+        node.parentNode = self.curNode
         self.curNode = node
 
         self.lastEvent[1] = [(START_ELEMENT, node), None]
@@ -63,7 +67,7 @@ class PullDOM(xml.sax.ContentHandler):
         self.lastEvent[1] = [(END_ELEMENT, node), None]
         self.lastEvent = self.lastEvent[1]
         #self.events.append((END_ELEMENT, node))
-        self.curNode = node.parentNode
+        self.curNode = self.curNode.parentNode
 
     def startElement(self, name, attrs):
         node = self.document.createElement(name)
@@ -73,8 +77,7 @@ class PullDOM(xml.sax.ContentHandler):
             attr.value = value
             node.setAttributeNode(attr)
 
-        parent = self.curNode
-        node.parentNode = parent
+        node.parentNode = self.curNode
         self.curNode = node
 
         self.lastEvent[1] = [(START_ELEMENT, node), None]
@@ -106,7 +109,7 @@ class PullDOM(xml.sax.ContentHandler):
         #self.events.append((PROCESSING_INSTRUCTION, node))
 
     def ignorableWhitespace(self, chars):
-        node = self.document.createTextNode(chars[start:start + length])
+        node = self.document.createTextNode(chars)
         parent = self.curNode
         node.parentNode = parent
         self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
@@ -121,20 +124,25 @@ class PullDOM(xml.sax.ContentHandler):
         self.lastEvent = self.lastEvent[1]
 
     def startDocument(self):
-        node = self.curNode = self.document = minidom.Document()
-        node.parentNode = None
+        publicId = systemId = None
+        if self._locator:
+            publicId = self._locator.getPublicId()
+            systemId = self._locator.getSystemId()
+        if self.documentFactory is None:
+            import xml.dom.minidom
+            self.documentFactory = xml.dom.minidom.Document.implementation
+        node = self.documentFactory.createDocument(None, publicId, systemId)
+        self.curNode = self.document = node
         self.lastEvent[1] = [(START_DOCUMENT, node), None]
         self.lastEvent = self.lastEvent[1]
         #self.events.append((START_DOCUMENT, node))
 
     def endDocument(self):
-        assert not self.curNode.parentNode
-        for node in self.curNode.childNodes:
-            if node.nodeType == node.ELEMENT_NODE:
-                self.document.documentElement = node
-        #if not self.document.documentElement:
-        #    raise Error, "No document element"
-
+        assert self.curNode.parentNode is None, \
+               "not all elements have been properly closed"
+        assert self.curNode.documentElement is not None, \
+               "document does not contain a root element"
+        node = self.curNode.documentElement
         self.lastEvent[1] = [(END_DOCUMENT, node), None]
         #self.events.append((END_DOCUMENT, self.curNode))
 
@@ -156,7 +164,7 @@ class DOMEventStream:
     def reset(self):
         self.pulldom = PullDOM()
         # This content handler relies on namespace support
-        self.parser.setFeature(xml.sax.handler.feature_namespaces,1)
+        self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
         self.parser.setContentHandler(self.pulldom)
 
     def __getitem__(self, pos):
@@ -179,7 +187,7 @@ class DOMEventStream:
         if not self.pulldom.firstEvent[1]:
             self.pulldom.lastEvent = self.pulldom.firstEvent
         while not self.pulldom.firstEvent[1]:
-            buf=self.stream.read(self.bufsize)
+            buf = self.stream.read(self.bufsize)
             if not buf:
                 #FIXME: why doesn't Expat close work?
                 #self.parser.close()
@@ -214,10 +222,13 @@ class SAX2DOM(PullDOM):
         node = self.lastEvent[0][1]
         node.parentNode.appendChild(node)
 
+
 default_bufsize = (2 ** 14) - 20
 
-def parse(stream_or_string, parser=None, bufsize=default_bufsize):
-    if type(stream_or_string) is type(""):
+def parse(stream_or_string, parser=None, bufsize=None):
+    if bufsize is None:
+        bufsize = default_bufsize
+    if type(stream_or_string) in [type(""), type(u"")]:
         stream = open(stream_or_string)
     else:
         stream = stream_or_string
-- 
2.30.9