Commit cd632a66 authored by Kirill Smelkov's avatar Kirill Smelkov

golang_str: Fix bstr.decode to handle 'string-escape' codec properly

On py2 str.decode('string-escape') returns str, not unicode and this
property is actually being used and relied upon by Lib/pickle.py:

https://github.com/python/cpython/blob/v2.7.18-0-g8d21aa21f2c/Lib/pickle.py#L967-L977

We promised bstr to be drop-in replacement for str on py2, so let's
adjust its behaviour to match the original because if we do not,
unpickling strings will break when str is replaced by bstr under
gpython.

Do not add bstr.encode yet until we hit a real case where it is actually used.
parent e4cbdfae
...@@ -422,6 +422,9 @@ class pybstr(bytes): ...@@ -422,6 +422,9 @@ class pybstr(bytes):
x = _utf8_decode_surrogateescape(self) x = _utf8_decode_surrogateescape(self)
else: else:
x = bytes.decode(self, encoding, errors) x = bytes.decode(self, encoding, errors)
# on py2 e.g. bytes.decode('string-escape') returns bytes
if PY_MAJOR_VERSION < 3 and isinstance(x, bytes):
return pyb(x)
return pyu(x) return pyu(x)
if PY_MAJOR_VERSION < 3: if PY_MAJOR_VERSION < 3:
......
...@@ -690,6 +690,17 @@ def test_strings_encodedecode(): ...@@ -690,6 +690,17 @@ def test_strings_encodedecode():
with raises(UnicodeEncodeError): with raises(UnicodeEncodeError):
u_k8mir.encode('ascii') u_k8mir.encode('ascii')
# on py2 there are encodings for which bytes.decode returns bytes
# e.g. bytes.decode('string-escape') is actually used by pickle
# verify that this exact semantic is preserved
if six.PY3:
with raises(LookupError): bs.decode('hex')
with raises(LookupError): bs.decode('string-escape')
else:
_ = bs.decode('string-escape'); assert type(_) is bstr; assert _ == bs
_ = b(r'x\'y').decode('string-escape'); assert type(_) is bstr; assert _bdata(_) == b"x'y"
_ = b('616263').decode('hex'); assert type(_) is bstr; assert _bdata(_) == b"abc"
# verify string operations like `x * 3` for all cases from bytes, bytearray, unicode, bstr and ustr. # verify string operations like `x * 3` for all cases from bytes, bytearray, unicode, bstr and ustr.
@mark.parametrize('tx', (bytes, unicode, bytearray, bstr, ustr)) @mark.parametrize('tx', (bytes, unicode, bytearray, bstr, ustr))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment