Commit 966322f9 authored by Alexandre Vassalotti's avatar Alexandre Vassalotti

Merged revisions 67934-67935 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

........
  r67934 | alexandre.vassalotti | 2008-12-27 02:08:47 -0500 (Sat, 27 Dec 2008) | 4 lines

  Fix issue #4730: cPickle corrupts high-unicode strings.
  Update outdated copy of PyUnicode_EncodeRawUnicodeEscape.
  Add a test case.
........
  r67935 | alexandre.vassalotti | 2008-12-27 02:13:01 -0500 (Sat, 27 Dec 2008) | 2 lines

  Add Misc/NEWS entry for r67934.
........
parent 6a00c742
...@@ -480,14 +480,21 @@ class AbstractPickleTests(unittest.TestCase): ...@@ -480,14 +480,21 @@ class AbstractPickleTests(unittest.TestCase):
if have_unicode: if have_unicode:
def test_unicode(self): def test_unicode(self):
endcases = [unicode(''), unicode('<\\u>'), unicode('<\\\u1234>'), endcases = [u'', u'<\\u>', u'<\\\\u1234>', u'<\n>',
unicode('<\n>'), unicode('<\\>')] u'<\\>', u'<\\\\U00012345>']
for proto in protocols: for proto in protocols:
for u in endcases: for u in endcases:
p = self.dumps(u, proto) p = self.dumps(u, proto)
u2 = self.loads(p) u2 = self.loads(p)
self.assertEqual(u2, u) self.assertEqual(u2, u)
def test_unicode_high_plane(self):
t = u'\U00012345'
for proto in protocols:
p = self.dumps(t, proto)
t2 = self.loads(p)
self.assertEqual(t2, t)
def test_ints(self): def test_ints(self):
import sys import sys
for proto in protocols: for proto in protocols:
......
...@@ -173,6 +173,9 @@ Library ...@@ -173,6 +173,9 @@ Library
- Issue #4014: Don't claim that Python has an Alpha release status, in addition - Issue #4014: Don't claim that Python has an Alpha release status, in addition
to claiming it is Mature. to claiming it is Mature.
- Issue #4730: Fixed the cPickle module to handle correctly astral characters
when protocol 0 is used.
Build Build
----- -----
......
...@@ -1255,41 +1255,90 @@ save_string(Picklerobject *self, PyObject *args, int doput) ...@@ -1255,41 +1255,90 @@ save_string(Picklerobject *self, PyObject *args, int doput)
/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
backslash and newline characters to \uXXXX escapes. */ backslash and newline characters to \uXXXX escapes. */
static PyObject * static PyObject *
modified_EncodeRawUnicodeEscape(const Py_UNICODE *s, int size) modified_EncodeRawUnicodeEscape(const Py_UNICODE *s, Py_ssize_t size)
{ {
PyObject *repr; PyObject *repr;
char *p; char *p;
char *q; char *q;
static const char *hexdigit = "0123456789ABCDEF"; static const char *hexdigit = "0123456789abcdef";
#ifdef Py_UNICODE_WIDE
const Py_ssize_t expandsize = 10;
#else
const Py_ssize_t expandsize = 6;
#endif
repr = PyString_FromStringAndSize(NULL, 6 * size); if (size > PY_SSIZE_T_MAX / expandsize)
if (repr == NULL) return PyErr_NoMemory();
return NULL;
if (size == 0) repr = PyString_FromStringAndSize(NULL, expandsize * size);
return repr; if (repr == NULL)
return NULL;
p = q = PyString_AS_STRING(repr); if (size == 0)
while (size-- > 0) {
Py_UNICODE ch = *s++;
/* Map 16-bit characters to '\uxxxx' */
if (ch >= 256 || ch == '\\' || ch == '\n') {
*p++ = '\\';
*p++ = 'u';
*p++ = hexdigit[(ch >> 12) & 0xf];
*p++ = hexdigit[(ch >> 8) & 0xf];
*p++ = hexdigit[(ch >> 4) & 0xf];
*p++ = hexdigit[ch & 15];
}
/* Copy everything else as-is */
else
*p++ = (char) ch;
}
*p = '\0';
_PyString_Resize(&repr, p - q);
return repr; return repr;
}
p = q = PyString_AS_STRING(repr);
while (size-- > 0) {
Py_UNICODE ch = *s++;
#ifdef Py_UNICODE_WIDE
/* Map 32-bit characters to '\Uxxxxxxxx' */
if (ch >= 0x10000) {
*p++ = '\\';
*p++ = 'U';
*p++ = hexdigit[(ch >> 28) & 0xf];
*p++ = hexdigit[(ch >> 24) & 0xf];
*p++ = hexdigit[(ch >> 20) & 0xf];
*p++ = hexdigit[(ch >> 16) & 0xf];
*p++ = hexdigit[(ch >> 12) & 0xf];
*p++ = hexdigit[(ch >> 8) & 0xf];
*p++ = hexdigit[(ch >> 4) & 0xf];
*p++ = hexdigit[ch & 15];
}
else
#else
/* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
if (ch >= 0xD800 && ch < 0xDC00) {
Py_UNICODE ch2;
Py_UCS4 ucs;
ch2 = *s++;
size--;
if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
*p++ = '\\';
*p++ = 'U';
*p++ = hexdigit[(ucs >> 28) & 0xf];
*p++ = hexdigit[(ucs >> 24) & 0xf];
*p++ = hexdigit[(ucs >> 20) & 0xf];
*p++ = hexdigit[(ucs >> 16) & 0xf];
*p++ = hexdigit[(ucs >> 12) & 0xf];
*p++ = hexdigit[(ucs >> 8) & 0xf];
*p++ = hexdigit[(ucs >> 4) & 0xf];
*p++ = hexdigit[ucs & 0xf];
continue;
}
/* Fall through: isolated surrogates are copied as-is */
s--;
size++;
}
#endif
/* Map 16-bit characters to '\uxxxx' */
if (ch >= 256 || ch == '\\' || ch == '\n') {
*p++ = '\\';
*p++ = 'u';
*p++ = hexdigit[(ch >> 12) & 0xf];
*p++ = hexdigit[(ch >> 8) & 0xf];
*p++ = hexdigit[(ch >> 4) & 0xf];
*p++ = hexdigit[ch & 15];
}
/* Copy everything else as-is */
else
*p++ = (char) ch;
}
*p = '\0';
_PyString_Resize(&repr, p - q);
return repr;
}
static int static int
save_unicode(Picklerobject *self, PyObject *args, int doput) save_unicode(Picklerobject *self, PyObject *args, int doput)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment