Commit 7fd7e36b authored by Andrew M. Kuchling's avatar Andrew M. Kuchling

Change pyexpat test suite to exercise the .returns_unicode attribute,

parsing the sample data once with 8-bit strings and once with Unicode.
parent beba056c
test_pyexpat
PI:
xml-stylesheet href="stylesheet.css"
'xml-stylesheet' 'href="stylesheet.css"'
Comment:
' comment data '
Notation declared: ('notation', None, 'notation.jpeg', None)
Unparsed entity decl:
('unparsed_entity', None, 'entity.file', None, 'notation')
Start element:
root {}
'root' {'attr1': 'value1', 'attr2': 'value2\341\275\200'}
NS decl:
myns http://www.python.org/namespace
'myns' 'http://www.python.org/namespace'
Start element:
http://www.python.org/namespace!subelement {}
'http://www.python.org/namespace!subelement' {}
Character data:
'Contents of subelements'
End element:
http://www.python.org/namespace!subelement
'http://www.python.org/namespace!subelement'
End of NS decl:
myns
'myns'
Start element:
sub2 {}
'sub2' {}
Start of CDATA section
Character data:
'contents of CDATA section'
End of CDATA section
End element:
sub2
External entity ref: http://www.python.org/namespace=http://www.w3.org/XML/1998/namespace external_entity None entity.file None
'sub2'
External entity ref: ('http://www.python.org/namespace=http://www.w3.org/XML/1998/namespace\014external_entity', None, 'entity.file', None)
End element:
root
'root'
PI:
u'xml-stylesheet' u'href="stylesheet.css"'
Comment:
u' comment data '
Notation declared: (u'notation', None, u'notation.jpeg', None)
Unparsed entity decl:
(u'unparsed_entity', None, u'entity.file', None, u'notation')
Start element:
u'root' {u'attr1': u'value1', u'attr2': u'value2\u1F40'}
NS decl:
u'myns' u'http://www.python.org/namespace'
Start element:
u'http://www.python.org/namespace!subelement' {}
Character data:
u'Contents of subelements'
End element:
u'http://www.python.org/namespace!subelement'
End of NS decl:
u'myns'
Start element:
u'sub2' {}
Start of CDATA section
Character data:
u'contents of CDATA section'
End of CDATA section
End element:
u'sub2'
External entity ref: (u'http://www.python.org/namespace=http://www.w3.org/XML/1998/namespace\014external_entity', None, u'entity.file', None)
End element:
u'root'
PI:
u'xml-stylesheet' u'href="stylesheet.css"'
Comment:
u' comment data '
Notation declared: (u'notation', None, u'notation.jpeg', None)
Unparsed entity decl:
(u'unparsed_entity', None, u'entity.file', None, u'notation')
Start element:
u'root' {u'attr1': u'value1', u'attr2': u'value2\u1F40'}
NS decl:
u'myns' u'http://www.python.org/namespace'
Start element:
u'http://www.python.org/namespace!subelement' {}
Character data:
u'Contents of subelements'
End element:
u'http://www.python.org/namespace!subelement'
End of NS decl:
u'myns'
Start element:
u'sub2' {}
Start of CDATA section
Character data:
u'contents of CDATA section'
End of CDATA section
End element:
u'sub2'
External entity ref: (u'http://www.python.org/namespace=http://www.w3.org/XML/1998/namespace\014external_entity', None, u'entity.file', None)
End element:
u'root'
......@@ -10,10 +10,10 @@ import pyexpat
class Outputter:
def StartElementHandler(self, name, attrs):
print 'Start element:\n\t', name, attrs
print 'Start element:\n\t', repr(name), attrs
def EndElementHandler(self, name):
print 'End element:\n\t', name
print 'End element:\n\t', repr(name)
def CharacterDataHandler(self, data):
data = string.strip(data)
......@@ -22,13 +22,13 @@ class Outputter:
print '\t', repr(data)
def ProcessingInstructionHandler(self, target, data):
print 'PI:\n\t', target, data
print 'PI:\n\t', repr(target), repr(data)
def StartNamespaceDeclHandler(self, prefix, uri):
print 'NS decl:\n\t', prefix, uri
print 'NS decl:\n\t', repr(prefix), repr(uri)
def EndNamespaceDeclHandler(self, prefix):
print 'End of NS decl:\n\t', prefix
print 'End of NS decl:\n\t', repr(prefix)
def StartCdataSectionHandler(self):
print 'Start of CDATA section'
......@@ -51,8 +51,9 @@ class Outputter:
print 'Not standalone'
return 1
def ExternalEntityRefHandler(self, context, base, sysId, pubId):
print 'External entity ref:', context, base, sysId, pubId
def ExternalEntityRefHandler(self, *args):
context, base, sysId, pubId = args
print 'External entity ref:', args
return 1
def DefaultHandler(self, userData):
......@@ -64,7 +65,14 @@ class Outputter:
out = Outputter()
parser = pyexpat.ParserCreate(namespace_separator='!')
for name in ['StartElementHandler', 'EndElementHandler',
# Test getting/setting returns_unicode
parser.returns_unicode = 0 ; assert parser.returns_unicode == 0
parser.returns_unicode = 1 ; assert parser.returns_unicode == 1
parser.returns_unicode = 2 ; assert parser.returns_unicode == 1
parser.returns_unicode = 0 ; assert parser.returns_unicode == 0
HANDLER_NAMES = ['StartElementHandler', 'EndElementHandler',
'CharacterDataHandler', 'ProcessingInstructionHandler',
'UnparsedEntityDeclHandler', 'NotationDeclHandler',
'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
......@@ -73,7 +81,8 @@ for name in ['StartElementHandler', 'EndElementHandler',
'DefaultHandler', 'DefaultHandlerExpand',
#'NotStandaloneHandler',
'ExternalEntityRefHandler'
]:
]
for name in HANDLER_NAMES:
setattr(parser, name, getattr(out, name) )
data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
......@@ -88,7 +97,7 @@ data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
%unparsed_entity;
]>
<root>
<root attr1="value1" attr2="value2&#8000;">
<myns:subelement xmlns:myns="http://www.python.org/namespace">
Contents of subelements
</myns:subelement>
......@@ -97,6 +106,8 @@ data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
</root>
"""
# Produce UTF-8 output
parser.returns_unicode = 0
try:
parser.Parse(data, 1)
except pyexpat.error:
......@@ -105,3 +116,33 @@ except pyexpat.error:
print '** Column', parser.ErrorColumnNumber
print '** Byte', parser.ErrorByteIndex
# Try the parse again, this time producing Unicode output
parser = pyexpat.ParserCreate(namespace_separator='!')
parser.returns_unicode = 1
for name in HANDLER_NAMES:
setattr(parser, name, getattr(out, name) )
try:
parser.Parse(data, 1)
except pyexpat.error:
print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
print '** Line', parser.ErrorLineNumber
print '** Column', parser.ErrorColumnNumber
print '** Byte', parser.ErrorByteIndex
# Try parsing a file
parser = pyexpat.ParserCreate(namespace_separator='!')
parser.returns_unicode = 1
for name in HANDLER_NAMES:
setattr(parser, name, getattr(out, name) )
import StringIO
file = StringIO.StringIO(data)
try:
parser.ParseFile(file)
except pyexpat.error:
print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
print '** Line', parser.ErrorLineNumber
print '** Column', parser.ErrorColumnNumber
print '** Byte', parser.ErrorByteIndex
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment