Commit ef180a0a authored by Paul Prescod's avatar Paul Prescod

Reference cycle fixes

parent 2e65a7ea
...@@ -8,6 +8,6 @@ dom -- The W3C Document Object Model. This supports DOM Level 1 + ...@@ -8,6 +8,6 @@ dom -- The W3C Document Object Model. This supports DOM Level 1 +
parser -- Python wrappers for XML parsers (currently only supports Expat). parser -- Python wrappers for XML parsers (currently only supports Expat).
sax -- The Simple API for XML, developed by XML-Dev, led by David sax -- The Simple API for XML, developed by XML-Dev, led by David
Megginson. This supports the SAX 2 API. Megginson and ported to Python by Lars Marius Garsholm. This
supports the SAX 2 API.
""" """
...@@ -29,11 +29,19 @@ class Node: ...@@ -29,11 +29,19 @@ class Node:
DOCUMENT_FRAGMENT_NODE = 11 DOCUMENT_FRAGMENT_NODE = 11
NOTATION_NODE = 12 NOTATION_NODE = 12
allnodes=[] allnodes={}
_debug=0
_makeParentNodes=1
debug=None
def __init__( self ): def __init__( self ):
self.childNodes=[] self.childNodes=[]
Node.allnodes.append( repr( id( self ))+repr( self.__class__ )) if Node._debug:
index=repr( id( self ))+repr( self.__class__ )
Node.allnodes[index]=repr( self.__dict__ )
if Node.debug==None:
Node.debug=open( "debug4.out", "w" )
Node.debug.write( "create %s\n"%index )
def __getattr__( self, key ): def __getattr__( self, key ):
if key[0:2]=="__": raise AttributeError if key[0:2]=="__": raise AttributeError
...@@ -72,12 +80,39 @@ class Node: ...@@ -72,12 +80,39 @@ class Node:
if self.childNodes: return 1 if self.childNodes: return 1
else: return 0 else: return 0
def _get_firstChild( self ):
return self.childNodes[0]
def _get_lastChild( self ):
return self.childNodes[-1]
def insertBefore( self, newChild, refChild): def insertBefore( self, newChild, refChild):
index=self.childNodes.index( refChild ) index=self.childNodes.index( refChild )
self.childNodes.insert( index, newChild ) self.childNodes.insert( index, newChild )
if self._makeParentNodes:
newChild.parentNode=self
def appendChild( self, node ): def appendChild( self, node ):
self.childNodes.append( node ) self.childNodes.append( node )
return node
def replaceChild( self, newChild, oldChild ):
index=self.childNodes.index( oldChild )
self.childNodes[index]=oldChild
def removeChild( self, oldChild ):
index=self.childNodes.index( oldChild )
del self.childNodes[index]
def cloneNode( self, deep ):
import new
clone=new.instance( self.__class__, self.__dict__ )
clone.attributes=self.attributes.copy()
if not deep:
clone.childNodes=[]
else:
clone.childNodes=map( lambda x: x.cloneNode, self.childNodes )
return clone
def unlink( self ): def unlink( self ):
self.parentNode=None self.parentNode=None
...@@ -86,10 +121,13 @@ class Node: ...@@ -86,10 +121,13 @@ class Node:
del self.childNodes[-1] # probably not most efficient! del self.childNodes[-1] # probably not most efficient!
self.childNodes=None self.childNodes=None
if self.attributes: if self.attributes:
for attr in self.attributes.values(): for attr in self._attrs.values():
attr.unlink() self.removeAttributeNode( attr )
self.attributes=None assert not len( self._attrs )
index=Node.allnodes.index( repr( id( self ))+repr( self.__class__ )) assert not len( self._attrsNS )
if Node._debug:
index=repr( id( self ))+repr( self.__class__ )
self.debug.write( "Deleting: %s\n" % index )
del Node.allnodes[index] del Node.allnodes[index]
def _write_data( writer, data): def _write_data( writer, data):
...@@ -100,11 +138,6 @@ def _write_data( writer, data): ...@@ -100,11 +138,6 @@ def _write_data( writer, data):
data=string.replace(data,">",">") data=string.replace(data,">",">")
writer.write(data) writer.write(data)
def _closeElement( element ):
del element.parentNode
for node in element.elements:
_closeElement( node )
def _getElementsByTagNameHelper( parent, name, rc ): def _getElementsByTagNameHelper( parent, name, rc ):
for node in parent.childNodes: for node in parent.childNodes:
if node.nodeType==Node.ELEMENT_NODE and\ if node.nodeType==Node.ELEMENT_NODE and\
...@@ -123,17 +156,16 @@ def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ): ...@@ -123,17 +156,16 @@ def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ):
class Attr(Node): class Attr(Node):
nodeType=Node.ATTRIBUTE_NODE nodeType=Node.ATTRIBUTE_NODE
def __init__( self, qName, namespaceURI="", prefix="", def __init__( self, qName, namespaceURI="", localName=None,
localName=None ): prefix=None ):
Node.__init__( self )
assert qName
# skip setattr for performance # skip setattr for performance
self.__dict__["nodeName"] = self.__dict__["name"] = qName
self.__dict__["localName"]=localName or qName self.__dict__["localName"]=localName or qName
self.__dict__["prefix"]=prefix self.__dict__["nodeName"] = self.__dict__["name"] = qName
self.__dict__["namespaceURI"]=namespaceURI self.__dict__["namespaceURI"]=namespaceURI
# nodeValue and value are set elsewhere self.__dict__["prefix"]=prefix
self.attributes=None self.attributes=None
Node.__init__( self )
# nodeValue and value are set elsewhere
def __setattr__( self, name, value ): def __setattr__( self, name, value ):
if name in ("value", "nodeValue" ): if name in ("value", "nodeValue" ):
...@@ -142,12 +174,13 @@ class Attr(Node): ...@@ -142,12 +174,13 @@ class Attr(Node):
self.__dict__[name]=value self.__dict__[name]=value
class AttributeList: class AttributeList:
# the attribute list is a transient interface to the underlying dictionaries """the attribute list is a transient interface to the underlying
# mutations here will change the underlying element's dictionary dictionaries. mutations here will change the underlying element's
dictionary"""
def __init__( self, attrs, attrsNS ): def __init__( self, attrs, attrsNS ):
self.__attrs=attrs self._attrs=attrs
self.__attrsNS=attrs self._attrsNS=attrsNS
self.length=len( self.__attrs.keys() ) self.length=len( self._attrs.keys() )
def item( self, index ): def item( self, index ):
try: try:
...@@ -157,40 +190,46 @@ class AttributeList: ...@@ -157,40 +190,46 @@ class AttributeList:
def items( self ): def items( self ):
return map( lambda node: (node.tagName, node.value), return map( lambda node: (node.tagName, node.value),
self.__attrs.values() ) self._attrs.values() )
def itemsNS( self ): def itemsNS( self ):
return map( lambda node: ((node.URI, node.localName), node.value), return map( lambda node: ((node.URI, node.localName), node.value),
self.__attrs.values() ) self._attrs.values() )
def keys( self ): def keys( self ):
return self.__attrs.keys() return self._attrs.keys()
def keysNS( self ): def keysNS( self ):
return self.__attrsNS.keys() return self._attrsNS.keys()
def values( self ): def values( self ):
return self.__attrs.values() return self._attrs.values()
def __len__( self ): def __len__( self ):
return self.length return self.length
def __cmp__( self, other ): def __cmp__( self, other ):
if self.__attrs is other.__attrs: if self._attrs is getattr( other, "_attrs", None ):
return 0 return 0
else: else:
return cmp( id( self ), id( other ) ) return cmp( id( self ), id( other ) )
#FIXME: is it appropriate to return .value? #FIXME: is it appropriate to return .value?
def __getitem__( self, attname_or_tuple ): def __getitem__( self, attname_or_tuple ):
if type( attname_or_tuple ) == type( (1,2) ): if type( attname_or_tuple ) == type( () ):
return self.__attrsNS[attname_or_tuple].value return self._attrsNS[attname_or_tuple]
else: else:
return self.__attrs[attname_or_tuple].value return self._attrs[attname_or_tuple]
def __setitem__( self, attname ): def __setitem__( self, attname ):
raise TypeError, "object does not support item assignment" raise TypeError, "object does not support item assignment"
def __delitem__( self, attname_or_tuple ):
node=self[attname_or_tuple]
node.unlink()
del self._attrs[node.name]
del self._attrsNS[(node.namespaceURI, node.localName)]
class Element( Node ): class Element( Node ):
nodeType=Node.ELEMENT_NODE nodeType=Node.ELEMENT_NODE
def __init__( self, tagName, namespaceURI="", prefix="", def __init__( self, tagName, namespaceURI="", prefix="",
...@@ -202,18 +241,18 @@ class Element( Node ): ...@@ -202,18 +241,18 @@ class Element( Node ):
self.namespaceURI=namespaceURI self.namespaceURI=namespaceURI
self.nodeValue=None self.nodeValue=None
self.__attrs={} # attributes are double-indexed: self._attrs={} # attributes are double-indexed:
self.__attrsNS={}# tagName -> Attribute self._attrsNS={}# tagName -> Attribute
# URI,localName -> Attribute # URI,localName -> Attribute
# in the future: consider lazy generation of attribute objects # in the future: consider lazy generation of attribute objects
# this is too tricky for now because of headaches # this is too tricky for now because of headaches
# with namespaces. # with namespaces.
def getAttribute( self, attname ): def getAttribute( self, attname ):
return self.__attrs[attname].value return self._attrs[attname].value
def getAttributeNS( self, namespaceURI, localName ): def getAttributeNS( self, namespaceURI, localName ):
return self.__attrsNS[(namespaceURI, localName)].value return self._attrsNS[(namespaceURI, localName)].value
def setAttribute( self, attname, value ): def setAttribute( self, attname, value ):
attr=Attr( attname ) attr=Attr( attname )
...@@ -222,26 +261,37 @@ class Element( Node ): ...@@ -222,26 +261,37 @@ class Element( Node ):
self.setAttributeNode( attr ) self.setAttributeNode( attr )
def setAttributeNS( self, namespaceURI, qualifiedName, value ): def setAttributeNS( self, namespaceURI, qualifiedName, value ):
attr=createAttributeNS( namespaceURI, qualifiedName ) prefix,localname=_nssplit( qualifiedName )
# for performance # for performance
attr = Attr( qualifiedName, namespaceURI, localname, prefix )
attr.__dict__["value"]=attr.__dict__["nodeValue"]=value attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
self.setAttributeNode( attr ) self.setAttributeNode( attr )
def getAttributeNode( self, attrname ):
return self._attrs.get( attrname )
def getAttributeNodeNS( self, namespaceURI, localName ):
return self._attrsNS[(namespaceURI, localName)]
def setAttributeNode( self, attr ): def setAttributeNode( self, attr ):
self.__attrs[attr.name]=attr old=self._attrs.get( attr.name, None)
self.__attrsNS[(attr.namespaceURI,attr.localName)]=attr if old:
old.unlink()
self._attrs[attr.name]=attr
self._attrsNS[(attr.namespaceURI,attr.localName)]=attr
def removeAttribute( self, name ): def removeAttribute( self, name ):
attr = self.__attrs[name] attr = self._attrs[name]
self.removeAttributeNode( attr ) self.removeAttributeNode( attr )
def removeAttributeNS( self, namespaceURI, localName ): def removeAttributeNS( self, namespaceURI, localName ):
attr = self.__attrsNS[(uri, localName)] attr = self._attrsNS[(namespaceURI, localName)]
self.removeAttributeNode( attr ) self.removeAttributeNode( attr )
def removeAttributeNode( self, node ): def removeAttributeNode( self, node ):
del self.__attrs[node.name] node.unlink()
del self.__attrsNS[(node.namespaceURI, node.localName)] del self._attrs[node.name]
del self._attrsNS[(node.namespaceURI, node.localName)]
def getElementsByTagName( self, name ): def getElementsByTagName( self, name ):
return _getElementsByTagNameHelper( self, name, [] ) return _getElementsByTagNameHelper( self, name, [] )
...@@ -271,7 +321,7 @@ class Element( Node ): ...@@ -271,7 +321,7 @@ class Element( Node ):
writer.write("/>") writer.write("/>")
def _get_attributes( self ): def _get_attributes( self ):
return AttributeList( self.__attrs, self.__attrsNS ) return AttributeList( self._attrs, self._attrsNS )
class Comment( Node ): class Comment( Node ):
nodeType=Node.COMMENT_NODE nodeType=Node.COMMENT_NODE
...@@ -313,15 +363,30 @@ class Text( Node ): ...@@ -313,15 +363,30 @@ class Text( Node ):
def writexml( self, writer ): def writexml( self, writer ):
_write_data( writer, self.data ) _write_data( writer, self.data )
def _nssplit( qualifiedName ):
fields = string.split(qualifiedName, ':')
if len(fields) == 2:
return fields
elif len(fields) == 1:
return( '', fields[0] )
class Document( Node ): class Document( Node ):
nodeType=Node.DOCUMENT_NODE nodeType=Node.DOCUMENT_NODE
documentElement=None
def __init__( self ): def __init__( self ):
Node.__init__( self ) Node.__init__( self )
self.documentElement=None
self.attributes=None self.attributes=None
self.nodeName="#document" self.nodeName="#document"
self.nodeValue=None self.nodeValue=None
def appendChild( self, node ):
if node.nodeType==Node.ELEMENT_NODE and self.documentElement:
raise TypeError, "Two document elements disallowed"
else:
self.documentElement=node
Node.appendChild( self, node )
return node
createElement=Element createElement=Element
createTextNode=Text createTextNode=Text
...@@ -333,32 +398,16 @@ class Document( Node ): ...@@ -333,32 +398,16 @@ class Document( Node ):
createAttribute=Attr createAttribute=Attr
def createElementNS(self, namespaceURI, qualifiedName): def createElementNS(self, namespaceURI, qualifiedName):
fields = string.split(qualifiedName, ':') prefix,localName=_nssplit( qualifiedName )
if len(fields) == 2: return Element(qualifiedName, namespaceURI, prefix, localName)
prefix = fields[0]
localName = fields[1]
elif len(fields) == 1:
prefix = ''
localName = fields[0]
return Element(self, qualifiedName, namespaceURI, prefix, localName)
def createAttributeNS(self, namespaceURI, qualifiedName): def createAttributeNS(self, namespaceURI, qualifiedName):
fields = string.split(qualifiedName,':') prefix,localName=_nssplit( qualifiedName )
if len(fields) == 2: return Attr(namespaceURI, qualifiedName, localName, prefix)
localName = fields[1]
prefix = fields[0]
elif len(fields) == 1:
localName = fields[0]
prefix = None
return Attr(qualifiedName, namespaceURI, prefix, localName)
def getElementsByTagNameNS(self,namespaceURI,localName): def getElementsByTagNameNS(self,namespaceURI,localName):
_getElementsByTagNameNSHelper( self, namespaceURI, localName ) _getElementsByTagNameNSHelper( self, namespaceURI, localName )
def close( self ):
for node in self.elements:
_closeElement( node )
def unlink( self ): def unlink( self ):
self.documentElement=None self.documentElement=None
Node.unlink( self ) Node.unlink( self )
......
...@@ -2,7 +2,6 @@ import minidom ...@@ -2,7 +2,6 @@ import minidom
import types import types
import string import string
import sys import sys
import pyexpat
from xml.sax import ExpatParser from xml.sax import ExpatParser
#todo: SAX2/namespace handling #todo: SAX2/namespace handling
...@@ -140,12 +139,8 @@ class DOMEventStream: ...@@ -140,12 +139,8 @@ class DOMEventStream:
if cur_node is node: return if cur_node is node: return
if token !=END_ELEMENT: if token !=END_ELEMENT:
cur_node.parentNode.childNodes.append( cur_node ) cur_node.parentNode.appendChild( cur_node )
event=self.getEvent() event=self.getEvent()
if node.nodeType==minidom.Node.DOCUMENT_NODE:
for child in node.childNodes:
if child.nodeType==minidom.Node.ELEMENT_NODE:
node.documentElement=child
def getEvent( self ): def getEvent( self ):
if not self.pulldom.firstEvent[1]: if not self.pulldom.firstEvent[1]:
...@@ -193,75 +188,7 @@ def parseString( string, parser=None ): ...@@ -193,75 +188,7 @@ def parseString( string, parser=None ):
stringio=StringIO.StringIO stringio=StringIO.StringIO
bufsize=len( string ) bufsize=len( string )
stringio( string ) buf=stringio( string )
parser=_getParser() parser=_getParser()
return DOMEventStream( buf, parser, bufsize ) return DOMEventStream( buf, parser, bufsize )
#FIXME: Use Lars' instead!!!
class SAX_expat:
"SAX driver for the Pyexpat C module."
def __init__(self):
self.parser=pyexpat.ParserCreate()
self.started=0
def setDocumentHandler( self, handler ):
self.parser.StartElementHandler = handler.startElement
self.parser.EndElementHandler = handler.endElement
self.parser.CharacterDataHandler = handler.datachars
self.parser.ProcessingInstructionHandler = handler.processingInstruction
self.doc_handler=handler
def setErrorHandler( self, handler ):
self.err_handler=handler
# --- Locator methods. Only usable after errors.
def getLineNumber(self):
return self.parser.ErrorLineNumber
def getColumnNumber(self):
return self.parser.ErrorColumnNumber
# --- Internal
def __report_error(self):
msg=pyexpat.ErrorString(self.parser.ErrorCode)
self.err_handler.fatalError(msg)
# --- EXPERIMENTAL PYTHON SAX EXTENSIONS
def get_parser_name(self):
return "pyexpat"
def get_parser_version(self):
return "Unknown"
def get_driver_version(self):
return version
def is_validating(self):
return 0
def is_dtd_reading(self):
return 0
def reset(self):
self.parser=pyexpat.ParserCreate()
self.parser.StartElementHandler = self.startElement
self.parser.EndElementHandler = self.endElement
self.parser.CharacterDataHandler = self.characters
self.parser.ProcessingInstructionHandler = self.processingInstruction
def feed(self,data):
if not self.started:
self.doc_handler.startDocument()
self.started=1
if not self.parser.Parse(data):
self.__report_error()
def close(self):
if not self.parser.Parse("",1):
self.__report_error()
self.doc_handler.endDocument()
self.parser = None
...@@ -23,3 +23,27 @@ from _exceptions import * ...@@ -23,3 +23,27 @@ from _exceptions import *
from saxutils import * from saxutils import *
from _exceptions import SAXParseException from _exceptions import SAXParseException
import xmlreader import xmlreader
def parse( filename_or_stream, handler, errorHandler=ErrorHandler() ):
parser=ExpatParser()
parser.setContentHandler( handler )
parse.setErrorHandler( errorHandler )
parser.parse( filename_or_stream )
# this may not work yet...Expat doesn't handle buffer inputs
def parseString( string, handler, errorHandler=ErrorHandler() ):
try:
import cStringIO
stringio=cStringIO.StringIO
except ImportError:
import StringIO
stringio=StringIO.StringIO
bufsize=len( string )
buf=stringio( string )
parser=ExpatParser()
parser.setContentHandler( handler )
parse.setErrorHandler( errorHandler )
parser.parse( buf )
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment