Commit d8331de5 authored by Andrew M. Kuchling's avatar Andrew M. Kuchling

Patch 1137: allow assigning to .buffer_size attribute of PyExpat.parser objects

parent ae797269
......@@ -182,11 +182,15 @@ XMLParser Objects
.. attribute:: xmlparser.buffer_size
The size of the buffer used when :attr:`buffer_text` is true. This value cannot
be changed at this time.
The size of the buffer used when :attr:`buffer_text` is true.
A new buffer size can be set by assigning a new integer value
to this attribute.
When the size is changed, the buffer will be flushed.
.. versionadded:: 2.3
.. versionchanged:: 2.6
The buffer size can now be changed.
.. attribute:: xmlparser.buffer_text
......
......@@ -875,6 +875,13 @@ complete list of changes, or look through the CVS logs for all the details.
changed and :const:`UF_APPEND` to indicate that data can only be appended to the
file. (Contributed by M. Levinson.)
* The :mod:`pyexpat` module's :class:`Parser` objects now allow setting
their :attr:`buffer_size` attribute to change the size of the buffer
used to hold character data.
(Contributed by Achim Gaedke.)
.. Patch 1137
* The :mod:`random` module's :class:`Random` objects can
now be pickled on a 32-bit system and unpickled on a 64-bit
system, and vice versa. Unfortunately, this change also means
......
# XXX TypeErrors on calling handlers, or on bad return values from a
# handler, are obscure and unhelpful.
import StringIO
import StringIO, sys
import unittest
import pyexpat
......@@ -434,6 +434,131 @@ class sf1296433Test(unittest.TestCase):
self.assertRaises(Exception, parser.Parse, xml)
class ChardataBufferTest(unittest.TestCase):
"""
test setting of chardata buffer size
"""
def test_1025_bytes(self):
self.assertEquals(self.small_buffer_test(1025), 2)
def test_1000_bytes(self):
self.assertEquals(self.small_buffer_test(1000), 1)
def test_wrong_size(self):
parser = expat.ParserCreate()
parser.buffer_text = 1
def f(size):
parser.buffer_size = size
self.assertRaises(TypeError, f, sys.maxint+1)
self.assertRaises(ValueError, f, -1)
self.assertRaises(ValueError, f, 0)
def test_unchanged_size(self):
xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
xml2 = 'a'*512 + '</s>'
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_size = 512
parser.buffer_text = 1
# Feed 512 bytes of character data: the handler should be called
# once.
self.n = 0
parser.Parse(xml1)
self.assertEquals(self.n, 1)
# Reassign to buffer_size, but assign the same size.
parser.buffer_size = parser.buffer_size
self.assertEquals(self.n, 1)
# Try parsing rest of the document
parser.Parse(xml2)
self.assertEquals(self.n, 2)
def test_disabling_buffer(self):
xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512)
xml2 = ('b' * 1024)
xml3 = "%s</a>" % ('c' * 1024)
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_text = 1
parser.buffer_size = 1024
self.assertEquals(parser.buffer_size, 1024)
# Parse one chunk of XML
self.n = 0
parser.Parse(xml1, 0)
self.assertEquals(parser.buffer_size, 1024)
self.assertEquals(self.n, 1)
# Turn off buffering and parse the next chunk.
parser.buffer_text = 0
self.assertFalse(parser.buffer_text)
self.assertEquals(parser.buffer_size, 1024)
for i in range(10):
parser.Parse(xml2, 0)
self.assertEquals(self.n, 11)
parser.buffer_text = 1
self.assertTrue(parser.buffer_text)
self.assertEquals(parser.buffer_size, 1024)
parser.Parse(xml3, 1)
self.assertEquals(self.n, 12)
def make_document(self, bytes):
return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>')
def counting_handler(self, text):
self.n += 1
def small_buffer_test(self, buffer_len):
xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len)
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_size = 1024
parser.buffer_text = 1
self.n = 0
parser.Parse(xml)
return self.n
def test_change_size_1(self):
xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024)
xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_text = 1
parser.buffer_size = 1024
self.assertEquals(parser.buffer_size, 1024)
self.n = 0
parser.Parse(xml1, 0)
parser.buffer_size *= 2
self.assertEquals(parser.buffer_size, 2048)
parser.Parse(xml2, 1)
self.assertEquals(self.n, 2)
def test_change_size_2(self):
xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023)
xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_text = 1
parser.buffer_size = 2048
self.assertEquals(parser.buffer_size, 2048)
self.n=0
parser.Parse(xml1, 0)
parser.buffer_size /= 2
self.assertEquals(parser.buffer_size, 1024)
parser.Parse(xml2, 1)
self.assertEquals(self.n, 4)
def test_main():
run_unittest(SetAttributeTest,
......@@ -443,7 +568,8 @@ def test_main():
BufferTextTest,
HandlerExceptionTest,
PositionTest,
sf1296433Test)
sf1296433Test,
ChardataBufferTest)
if __name__ == "__main__":
test_main()
......@@ -225,6 +225,7 @@ Gyro Funch
Peter Funk
Geoff Furnish
Ulisses Furquim
Achim Gaedke
Lele Gaifax
Santiago Gala
Yitzchak Gale
......
......@@ -348,6 +348,9 @@ Core and builtins
Library
-------
- pyexpat, patch #1137: allow setting buffer_size attribute
on Parser objects to set the character data buffer size.
- Issue #1757: The hash of a Decimal instance is no longer affected by
the current context.
......
......@@ -1649,6 +1649,50 @@ xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
self->specified_attributes = 0;
return 0;
}
if (strcmp(name, "buffer_size") == 0) {
long new_buffer_size;
if (!PyInt_Check(v)) {
PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
return -1;
}
new_buffer_size=PyInt_AS_LONG(v);
/* trivial case -- no change */
if (new_buffer_size == self->buffer_size) {
return 0;
}
if (new_buffer_size <= 0) {
PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
return -1;
}
/* check maximum */
if (new_buffer_size > INT_MAX) {
char errmsg[100];
sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
PyErr_SetString(PyExc_ValueError, errmsg);
return -1;
}
if (self->buffer != NULL) {
/* there is already a buffer */
if (self->buffer_used != 0) {
flush_character_buffer(self);
}
/* free existing buffer */
free(self->buffer);
}
self->buffer = malloc(new_buffer_size);
if (self->buffer == NULL) {
PyErr_NoMemory();
return -1;
}
self->buffer_size = new_buffer_size;
return 0;
}
if (strcmp(name, "CharacterDataHandler") == 0) {
/* If we're changing the character data handler, flush all
* cached data with the old handler. Not sure there's a
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment