Commit 7fd7e36b authored by Andrew M. Kuchling's avatar Andrew M. Kuchling

Change pyexpat test suite to exercise the .returns_unicode attribute,

parsing the sample data once with 8-bit strings and once with Unicode.
parent beba056c
test_pyexpat test_pyexpat
PI: PI:
xml-stylesheet href="stylesheet.css" 'xml-stylesheet' 'href="stylesheet.css"'
Comment: Comment:
' comment data ' ' comment data '
Notation declared: ('notation', None, 'notation.jpeg', None) Notation declared: ('notation', None, 'notation.jpeg', None)
Unparsed entity decl: Unparsed entity decl:
('unparsed_entity', None, 'entity.file', None, 'notation') ('unparsed_entity', None, 'entity.file', None, 'notation')
Start element: Start element:
root {} 'root' {'attr1': 'value1', 'attr2': 'value2\341\275\200'}
NS decl: NS decl:
myns http://www.python.org/namespace 'myns' 'http://www.python.org/namespace'
Start element: Start element:
http://www.python.org/namespace!subelement {} 'http://www.python.org/namespace!subelement' {}
Character data: Character data:
'Contents of subelements' 'Contents of subelements'
End element: End element:
http://www.python.org/namespace!subelement 'http://www.python.org/namespace!subelement'
End of NS decl: End of NS decl:
myns 'myns'
Start element: Start element:
sub2 {} 'sub2' {}
Start of CDATA section Start of CDATA section
Character data: Character data:
'contents of CDATA section' 'contents of CDATA section'
End of CDATA section End of CDATA section
End element: End element:
sub2 'sub2'
External entity ref: http://www.python.org/namespace=http://www.w3.org/XML/1998/namespace external_entity None entity.file None External entity ref: ('http://www.python.org/namespace=http://www.w3.org/XML/1998/namespace\014external_entity', None, 'entity.file', None)
End element: End element:
root 'root'
PI:
u'xml-stylesheet' u'href="stylesheet.css"'
Comment:
u' comment data '
Notation declared: (u'notation', None, u'notation.jpeg', None)
Unparsed entity decl:
(u'unparsed_entity', None, u'entity.file', None, u'notation')
Start element:
u'root' {u'attr1': u'value1', u'attr2': u'value2\u1F40'}
NS decl:
u'myns' u'http://www.python.org/namespace'
Start element:
u'http://www.python.org/namespace!subelement' {}
Character data:
u'Contents of subelements'
End element:
u'http://www.python.org/namespace!subelement'
End of NS decl:
u'myns'
Start element:
u'sub2' {}
Start of CDATA section
Character data:
u'contents of CDATA section'
End of CDATA section
End element:
u'sub2'
External entity ref: (u'http://www.python.org/namespace=http://www.w3.org/XML/1998/namespace\014external_entity', None, u'entity.file', None)
End element:
u'root'
PI:
u'xml-stylesheet' u'href="stylesheet.css"'
Comment:
u' comment data '
Notation declared: (u'notation', None, u'notation.jpeg', None)
Unparsed entity decl:
(u'unparsed_entity', None, u'entity.file', None, u'notation')
Start element:
u'root' {u'attr1': u'value1', u'attr2': u'value2\u1F40'}
NS decl:
u'myns' u'http://www.python.org/namespace'
Start element:
u'http://www.python.org/namespace!subelement' {}
Character data:
u'Contents of subelements'
End element:
u'http://www.python.org/namespace!subelement'
End of NS decl:
u'myns'
Start element:
u'sub2' {}
Start of CDATA section
Character data:
u'contents of CDATA section'
End of CDATA section
End element:
u'sub2'
External entity ref: (u'http://www.python.org/namespace=http://www.w3.org/XML/1998/namespace\014external_entity', None, u'entity.file', None)
End element:
u'root'
...@@ -10,10 +10,10 @@ import pyexpat ...@@ -10,10 +10,10 @@ import pyexpat
class Outputter: class Outputter:
def StartElementHandler(self, name, attrs): def StartElementHandler(self, name, attrs):
print 'Start element:\n\t', name, attrs print 'Start element:\n\t', repr(name), attrs
def EndElementHandler(self, name): def EndElementHandler(self, name):
print 'End element:\n\t', name print 'End element:\n\t', repr(name)
def CharacterDataHandler(self, data): def CharacterDataHandler(self, data):
data = string.strip(data) data = string.strip(data)
...@@ -22,13 +22,13 @@ class Outputter: ...@@ -22,13 +22,13 @@ class Outputter:
print '\t', repr(data) print '\t', repr(data)
def ProcessingInstructionHandler(self, target, data): def ProcessingInstructionHandler(self, target, data):
print 'PI:\n\t', target, data print 'PI:\n\t', repr(target), repr(data)
def StartNamespaceDeclHandler(self, prefix, uri): def StartNamespaceDeclHandler(self, prefix, uri):
print 'NS decl:\n\t', prefix, uri print 'NS decl:\n\t', repr(prefix), repr(uri)
def EndNamespaceDeclHandler(self, prefix): def EndNamespaceDeclHandler(self, prefix):
print 'End of NS decl:\n\t', prefix print 'End of NS decl:\n\t', repr(prefix)
def StartCdataSectionHandler(self): def StartCdataSectionHandler(self):
print 'Start of CDATA section' print 'Start of CDATA section'
...@@ -51,8 +51,9 @@ class Outputter: ...@@ -51,8 +51,9 @@ class Outputter:
print 'Not standalone' print 'Not standalone'
return 1 return 1
def ExternalEntityRefHandler(self, context, base, sysId, pubId): def ExternalEntityRefHandler(self, *args):
print 'External entity ref:', context, base, sysId, pubId context, base, sysId, pubId = args
print 'External entity ref:', args
return 1 return 1
def DefaultHandler(self, userData): def DefaultHandler(self, userData):
...@@ -64,7 +65,14 @@ class Outputter: ...@@ -64,7 +65,14 @@ class Outputter:
out = Outputter() out = Outputter()
parser = pyexpat.ParserCreate(namespace_separator='!') parser = pyexpat.ParserCreate(namespace_separator='!')
for name in ['StartElementHandler', 'EndElementHandler',
# Test getting/setting returns_unicode
parser.returns_unicode = 0 ; assert parser.returns_unicode == 0
parser.returns_unicode = 1 ; assert parser.returns_unicode == 1
parser.returns_unicode = 2 ; assert parser.returns_unicode == 1
parser.returns_unicode = 0 ; assert parser.returns_unicode == 0
HANDLER_NAMES = ['StartElementHandler', 'EndElementHandler',
'CharacterDataHandler', 'ProcessingInstructionHandler', 'CharacterDataHandler', 'ProcessingInstructionHandler',
'UnparsedEntityDeclHandler', 'NotationDeclHandler', 'UnparsedEntityDeclHandler', 'NotationDeclHandler',
'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
...@@ -73,7 +81,8 @@ for name in ['StartElementHandler', 'EndElementHandler', ...@@ -73,7 +81,8 @@ for name in ['StartElementHandler', 'EndElementHandler',
'DefaultHandler', 'DefaultHandlerExpand', 'DefaultHandler', 'DefaultHandlerExpand',
#'NotStandaloneHandler', #'NotStandaloneHandler',
'ExternalEntityRefHandler' 'ExternalEntityRefHandler'
]: ]
for name in HANDLER_NAMES:
setattr(parser, name, getattr(out, name) ) setattr(parser, name, getattr(out, name) )
data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
...@@ -88,7 +97,7 @@ data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> ...@@ -88,7 +97,7 @@ data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
%unparsed_entity; %unparsed_entity;
]> ]>
<root> <root attr1="value1" attr2="value2&#8000;">
<myns:subelement xmlns:myns="http://www.python.org/namespace"> <myns:subelement xmlns:myns="http://www.python.org/namespace">
Contents of subelements Contents of subelements
</myns:subelement> </myns:subelement>
...@@ -97,6 +106,8 @@ data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> ...@@ -97,6 +106,8 @@ data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
</root> </root>
""" """
# Produce UTF-8 output
parser.returns_unicode = 0
try: try:
parser.Parse(data, 1) parser.Parse(data, 1)
except pyexpat.error: except pyexpat.error:
...@@ -105,3 +116,33 @@ except pyexpat.error: ...@@ -105,3 +116,33 @@ except pyexpat.error:
print '** Column', parser.ErrorColumnNumber print '** Column', parser.ErrorColumnNumber
print '** Byte', parser.ErrorByteIndex print '** Byte', parser.ErrorByteIndex
# Try the parse again, this time producing Unicode output
parser = pyexpat.ParserCreate(namespace_separator='!')
parser.returns_unicode = 1
for name in HANDLER_NAMES:
setattr(parser, name, getattr(out, name) )
try:
parser.Parse(data, 1)
except pyexpat.error:
print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
print '** Line', parser.ErrorLineNumber
print '** Column', parser.ErrorColumnNumber
print '** Byte', parser.ErrorByteIndex
# Try parsing a file
parser = pyexpat.ParserCreate(namespace_separator='!')
parser.returns_unicode = 1
for name in HANDLER_NAMES:
setattr(parser, name, getattr(out, name) )
import StringIO
file = StringIO.StringIO(data)
try:
parser.ParseFile(file)
except pyexpat.error:
print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode)
print '** Line', parser.ErrorLineNumber
print '** Column', parser.ErrorColumnNumber
print '** Byte', parser.ErrorByteIndex
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment