Commit 56817db5 authored by Amaury Forgeot d'Arc's avatar Amaury Forgeot d'Arc

The incremental decoder for utf-7 must preserve its state between calls.

Solves issue1460.

Might not be a backport candidate: a new API function was added,
and some code may rely on details in utf-7.py.
parent 64f1ff23
...@@ -674,6 +674,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( ...@@ -674,6 +674,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
const char *errors /* error handling */ const char *errors /* error handling */
); );
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
const char *string, /* UTF-7 encoded string */
Py_ssize_t length, /* size of string */
const char *errors, /* error handling */
Py_ssize_t *consumed /* bytes consumed */
);
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
const Py_UNICODE *data, /* Unicode char buffer */ const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* number of Py_UNICODE chars to encode */ Py_ssize_t length, /* number of Py_UNICODE chars to encode */
......
...@@ -6,34 +6,31 @@ import codecs ...@@ -6,34 +6,31 @@ import codecs
### Codec APIs ### Codec APIs
class Codec(codecs.Codec): encode = codecs.utf_7_encode
# Note: Binding these as C functions will result in the class not def decode(input, errors='strict'):
# converting them to methods. This is intended. return codecs.utf_7_decode(input, errors, True)
encode = codecs.utf_7_encode
decode = codecs.utf_7_decode
class IncrementalEncoder(codecs.IncrementalEncoder): class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False): def encode(self, input, final=False):
return codecs.utf_7_encode(input, self.errors)[0] return codecs.utf_7_encode(input, self.errors)[0]
class IncrementalDecoder(codecs.BufferedIncrementalDecoder): class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def _buffer_decode(self, input, errors, final): _buffer_decode = codecs.utf_7_decode
return codecs.utf_7_decode(input, self.errors)
class StreamWriter(Codec,codecs.StreamWriter): class StreamWriter(codecs.StreamWriter):
pass encode = codecs.utf_7_encode
class StreamReader(Codec,codecs.StreamReader): class StreamReader(codecs.StreamReader):
pass decode = codecs.utf_7_decode
### encodings module API ### encodings module API
def getregentry(): def getregentry():
return codecs.CodecInfo( return codecs.CodecInfo(
name='utf-7', name='utf-7',
encode=Codec.encode, encode=encode,
decode=Codec.decode, decode=decode,
incrementalencoder=IncrementalEncoder, incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder, incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader, streamreader=StreamReader,
......
...@@ -51,7 +51,7 @@ class ReadTest(unittest.TestCase): ...@@ -51,7 +51,7 @@ class ReadTest(unittest.TestCase):
self.assertEqual(d.decode("", True), u"") self.assertEqual(d.decode("", True), u"")
self.assertEqual(d.buffer, "") self.assertEqual(d.buffer, "")
# Check whether the rest method works properly # Check whether the reset method works properly
d.reset() d.reset()
result = u"" result = u""
for (c, partialresult) in zip(input.encode(self.encoding), partialresults): for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
...@@ -491,7 +491,17 @@ class UTF8Test(ReadTest): ...@@ -491,7 +491,17 @@ class UTF8Test(ReadTest):
class UTF7Test(ReadTest): class UTF7Test(ReadTest):
encoding = "utf-7" encoding = "utf-7"
# No test_partial() yet, because UTF-7 doesn't support it. def test_partial(self):
self.check_partial(
u"a+-b",
[
u"a",
u"a",
u"a+",
u"a+-",
u"a+-b",
]
)
class UTF16ExTest(unittest.TestCase): class UTF16ExTest(unittest.TestCase):
......
...@@ -230,18 +230,25 @@ unicode_internal_decode(PyObject *self, ...@@ -230,18 +230,25 @@ unicode_internal_decode(PyObject *self,
static PyObject * static PyObject *
utf_7_decode(PyObject *self, utf_7_decode(PyObject *self,
PyObject *args) PyObject *args)
{ {
const char *data; const char *data;
Py_ssize_t size; Py_ssize_t size;
const char *errors = NULL; const char *errors = NULL;
int final = 0;
Py_ssize_t consumed;
PyObject *decoded = NULL;
if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode", if (!PyArg_ParseTuple(args, "t#|zi:utf_7_decode",
&data, &size, &errors)) &data, &size, &errors, &final))
return NULL; return NULL;
consumed = size;
return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors), decoded = PyUnicode_DecodeUTF7Stateful(data, size, errors,
size); final ? NULL : &consumed);
if (decoded == NULL)
return NULL;
return codec_tuple(decoded, consumed);
} }
static PyObject * static PyObject *
......
...@@ -943,6 +943,14 @@ char utf7_special[128] = { ...@@ -943,6 +943,14 @@ char utf7_special[128] = {
PyObject *PyUnicode_DecodeUTF7(const char *s, PyObject *PyUnicode_DecodeUTF7(const char *s,
Py_ssize_t size, Py_ssize_t size,
const char *errors) const char *errors)
{
return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL);
}
PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed)
{ {
const char *starts = s; const char *starts = s;
Py_ssize_t startinpos; Py_ssize_t startinpos;
...@@ -962,8 +970,11 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, ...@@ -962,8 +970,11 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
unicode = _PyUnicode_New(size); unicode = _PyUnicode_New(size);
if (!unicode) if (!unicode)
return NULL; return NULL;
if (size == 0) if (size == 0) {
if (consumed)
*consumed = 0;
return (PyObject *)unicode; return (PyObject *)unicode;
}
p = unicode->str; p = unicode->str;
e = s + size; e = s + size;
...@@ -1049,7 +1060,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, ...@@ -1049,7 +1060,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
goto onError; goto onError;
} }
if (inShift) { if (inShift && !consumed) {
outpos = p-PyUnicode_AS_UNICODE(unicode); outpos = p-PyUnicode_AS_UNICODE(unicode);
endinpos = size; endinpos = size;
if (unicode_decode_call_errorhandler( if (unicode_decode_call_errorhandler(
...@@ -1061,6 +1072,12 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, ...@@ -1061,6 +1072,12 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
if (s < e) if (s < e)
goto restart; goto restart;
} }
if (consumed) {
if(inShift)
*consumed = startinpos;
else
*consumed = s-starts;
}
if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)) < 0) if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)) < 0)
goto onError; goto onError;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment