Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
61de087f
Commit
61de087f
authored
Apr 02, 2015
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #2175: SAX parsers now support a character stream of InputSource object.
parent
278ba269
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
64 additions
and
12 deletions
+64
-12
Doc/library/xml.sax.reader.rst
Doc/library/xml.sax.reader.rst
+6
-6
Doc/whatsnew/3.5.rst
Doc/whatsnew/3.5.rst
+7
-0
Lib/test/test_sax.py
Lib/test/test_sax.py
+33
-0
Lib/xml/sax/expatreader.py
Lib/xml/sax/expatreader.py
+8
-3
Lib/xml/sax/saxutils.py
Lib/xml/sax/saxutils.py
+5
-2
Lib/xml/sax/xmlreader.py
Lib/xml/sax/xmlreader.py
+3
-1
Misc/NEWS
Misc/NEWS
+2
-0
No files found.
Doc/library/xml.sax.reader.rst
View file @
61de087f
...
...
@@ -100,8 +100,10 @@ The :class:`XMLReader` interface supports the following methods:
system identifier (a string identifying the input source -- typically a file
name or an URL), a file-like object, or an :class:`InputSource` object. When
:meth:`parse` returns, the input is completely processed, and the parser object
can be discarded or reset. As a limitation, the current implementation only
accepts byte streams; processing of character streams is for further study.
can be discarded or reset.
.. versionchanged:: 3.5
Added support of character streams.
.. method:: XMLReader.getContentHandler()
...
...
@@ -288,8 +290,7 @@ InputSource Objects
.. method:: InputSource.setByteStream(bytefile)
Set the byte stream (a Python file-like object which does not perform
byte-to-character conversion) for this input source.
Set the byte stream (a :term:`binary file`) for this input source.
The SAX parser will ignore this if there is also a character stream specified,
but it will use a byte stream in preference to opening a URI connection itself.
...
...
@@ -308,8 +309,7 @@ InputSource Objects
.. method:: InputSource.setCharacterStream(charfile)
Set the character stream for this input source. (The stream must be a Python 1.6
Unicode-wrapped file-like that performs conversion to strings.)
Set the character stream (a :term:`text file`) for this input source.
If there is a character stream specified, the SAX parser will ignore any byte
stream and will not attempt to open a URI connection to the system identifier.
...
...
Doc/whatsnew/3.5.rst
View file @
61de087f
...
...
@@ -499,6 +499,13 @@ xmlrpc
* :class:`xmlrpc.client.ServerProxy` is now a :term:`context manager`.
(Contributed by Claudiu Popa in :issue:`20627`.)
xml.sax
-------
* SAX parsers now support a character stream of
:class:`~xml.sax.xmlreader.InputSource` object.
(Contributed by Serhiy Storchaka in :issue:`2175`.)
faulthandler
------------
...
...
Lib/test/test_sax.py
View file @
61de087f
...
...
@@ -185,12 +185,24 @@ class PrepareInputSourceTest(unittest.TestCase):
def
make_byte_stream
(
self
):
return
BytesIO
(
b"This is a byte stream."
)
def
make_character_stream
(
self
):
return
StringIO
(
"This is a character stream."
)
def
checkContent
(
self
,
stream
,
content
):
self
.
assertIsNotNone
(
stream
)
self
.
assertEqual
(
stream
.
read
(),
content
)
stream
.
close
()
def
test_character_stream
(
self
):
# If the source is an InputSource with a character stream, use it.
src
=
InputSource
(
self
.
file
)
src
.
setCharacterStream
(
self
.
make_character_stream
())
prep
=
prepare_input_source
(
src
)
self
.
assertIsNone
(
prep
.
getByteStream
())
self
.
checkContent
(
prep
.
getCharacterStream
(),
"This is a character stream."
)
def
test_byte_stream
(
self
):
# If the source is an InputSource that does not have a character
# stream but does have a byte stream, use the byte stream.
...
...
@@ -225,6 +237,14 @@ class PrepareInputSourceTest(unittest.TestCase):
self
.
checkContent
(
prep
.
getByteStream
(),
b"This is a byte stream."
)
def
test_text_file
(
self
):
# If the source is a text file-like object, use it as a character
# stream.
prep
=
prepare_input_source
(
self
.
make_character_stream
())
self
.
assertIsNone
(
prep
.
getByteStream
())
self
.
checkContent
(
prep
.
getCharacterStream
(),
"This is a character stream."
)
# ===== XMLGenerator
...
...
@@ -904,6 +924,19 @@ class ExpatReaderTest(XmlTestBase):
self
.
assertEqual
(
result
.
getvalue
(),
xml_test_out
)
def
test_expat_inpsource_character_stream
(
self
):
parser
=
create_parser
()
result
=
BytesIO
()
xmlgen
=
XMLGenerator
(
result
)
parser
.
setContentHandler
(
xmlgen
)
inpsrc
=
InputSource
()
with
open
(
TEST_XMLFILE
,
'rt'
,
encoding
=
'iso-8859-1'
)
as
f
:
inpsrc
.
setCharacterStream
(
f
)
parser
.
parse
(
inpsrc
)
self
.
assertEqual
(
result
.
getvalue
(),
xml_test_out
)
# ===== IncrementalParser support
def
test_expat_incremental
(
self
):
...
...
Lib/xml/sax/expatreader.py
View file @
61de087f
...
...
@@ -219,9 +219,14 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
self
.
_parsing
=
0
# break cycle created by expat handlers pointing to our methods
self
.
_parser
=
None
bs
=
self
.
_source
.
getByteStream
()
if
bs
is
not
None
:
bs
.
close
()
try
:
file
=
self
.
_source
.
getCharacterStream
()
if
file
is
not
None
:
file
.
close
()
finally
:
file
=
self
.
_source
.
getByteStream
()
if
file
is
not
None
:
file
.
close
()
def
_reset_cont_handler
(
self
):
self
.
_parser
.
ProcessingInstructionHandler
=
\
...
...
Lib/xml/sax/saxutils.py
View file @
61de087f
...
...
@@ -345,11 +345,14 @@ def prepare_input_source(source, base=""):
elif
hasattr
(
source
,
"read"
):
f
=
source
source
=
xmlreader
.
InputSource
()
source
.
setByteStream
(
f
)
if
isinstance
(
f
.
read
(
0
),
str
):
source
.
setCharacterStream
(
f
)
else
:
source
.
setByteStream
(
f
)
if
hasattr
(
f
,
"name"
)
and
isinstance
(
f
.
name
,
str
):
source
.
setSystemId
(
f
.
name
)
if
source
.
getByteStream
()
is
None
:
if
source
.
get
CharacterStream
()
is
None
and
source
.
get
ByteStream
()
is
None
:
sysid
=
source
.
getSystemId
()
basehead
=
os
.
path
.
dirname
(
os
.
path
.
normpath
(
base
))
sysidfilename
=
os
.
path
.
join
(
basehead
,
sysid
)
...
...
Lib/xml/sax/xmlreader.py
View file @
61de087f
...
...
@@ -117,7 +117,9 @@ class IncrementalParser(XMLReader):
source
=
saxutils
.
prepare_input_source
(
source
)
self
.
prepareParser
(
source
)
file
=
source
.
getByteStream
()
file
=
source
.
getCharacterStream
()
if
file
is
None
:
file
=
source
.
getByteStream
()
buffer
=
file
.
read
(
self
.
_bufsize
)
while
buffer
:
self
.
feed
(
buffer
)
...
...
Misc/NEWS
View file @
61de087f
...
...
@@ -16,6 +16,8 @@ Core and Builtins
Library
-------
- Issue #2175: SAX parsers now support a character stream of InputSource object.
- Issue #16840: Tkinter now supports 64-bit integers added in Tcl 8.4 and
arbitrary precision integers added in Tcl 8.5.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment