Commit 0705610b authored by Victor Stinner's avatar Victor Stinner

Issue #8383: pickle and pickletools use surrogatepass error handler when

encoding unicode as utf8 to support lone surrogates and stay compatible with
Python 2.x and 3.0
parent 45cbf940
......@@ -499,7 +499,7 @@ class _Pickler:
def save_str(self, obj, pack=struct.pack):
if self.bin:
encoded = obj.encode('utf-8')
encoded = obj.encode('utf-8', 'surrogatepass')
n = len(encoded)
self.write(BINUNICODE + pack("<i", n) + encoded)
else:
......@@ -966,7 +966,7 @@ class _Unpickler:
def load_binunicode(self):
len = mloads(b'i' + self.read(4))
self.append(str(self.read(len), 'utf-8'))
self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
dispatch[BINUNICODE[0]] = load_binunicode
def load_short_binstring(self):
......
......@@ -469,7 +469,7 @@ def read_unicodestring4(f):
raise ValueError("unicodestring4 byte count < 0: %d" % n)
data = f.read(n)
if len(data) == n:
return str(data, 'utf-8')
return str(data, 'utf-8', 'surrogatepass')
raise ValueError("expected %d bytes in a unicodestring4, but only %d "
"remain" % (n, len(data)))
......
......@@ -515,7 +515,9 @@ class AbstractPickleTests(unittest.TestCase):
def test_unicode(self):
endcases = ['', '<\\u>', '<\\\u1234>', '<\n>',
'<\\>', '<\\\U00012345>']
'<\\>', '<\\\U00012345>',
# surrogates
'<\udc80>']
for proto in protocols:
for u in endcases:
p = self.dumps(u, proto)
......
......@@ -312,6 +312,10 @@ C-API
Library
-------
- Issue #8383: pickle and pickletools use surrogatepass error handler when
encoding unicode as utf8 to support lone surrogates and stay compatible with
Python 2.x and 3.0
- Issue #7585: difflib context and unified diffs now place a tab between
filename and date, conforming to the 'standards' they were originally
designed to follow. This improves compatibility with patch tools.
......
......@@ -1227,7 +1227,9 @@ save_unicode(PicklerObject *self, PyObject *obj)
if (self->bin) {
char pdata[5];
encoded = PyUnicode_AsUTF8String(obj);
encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
PyUnicode_GET_SIZE(obj),
"surrogatepass");
if (encoded == NULL)
goto error;
......@@ -3352,7 +3354,7 @@ load_binunicode(UnpicklerObject *self)
if (unpickler_read(self, &s, size) < 0)
return -1;
str = PyUnicode_DecodeUTF8(s, size, NULL);
str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
if (str == NULL)
return -1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment