Commit ea5abe71 authored by Kirill Smelkov's avatar Kirill Smelkov

golang_str: Move py3/py2 conditioning into _utf8_{encode,decode}_surrogateescape

So that those routines could be just called and do what is expected
without the caller caring whether it is py2 or py3. We will soon need to
use those routines from several callsites, and having that py2/py3
conditioning being spread over all usage places would be inconvenient.

/reviewed-by @jerome
/reviewed-at nexedi/pygolang!18
parent 50b8cb7e
...@@ -43,13 +43,7 @@ def pyb(s): # -> bytes ...@@ -43,13 +43,7 @@ def pyb(s): # -> bytes
if isinstance(s, bytes): # py2: str py3: bytes if isinstance(s, bytes): # py2: str py3: bytes
pass pass
elif isinstance(s, unicode): # py2: unicode py3: str elif isinstance(s, unicode): # py2: unicode py3: str
if PY_MAJOR_VERSION >= 3: s = _utf8_encode_surrogateescape(s)
s = s.encode('UTF-8', 'surrogateescape')
else:
# py2 does not have surrogateescape error handler, and even if we
# provide one, builtin unicode.encode() does not treat
# \udc80-\udcff as error. -> Do the encoding ourselves.
s = _utf8_encode_surrogateescape(s)
else: else:
raise TypeError("b: invalid type %s" % type(s)) raise TypeError("b: invalid type %s" % type(s))
...@@ -76,13 +70,7 @@ def pyu(s): # -> unicode ...@@ -76,13 +70,7 @@ def pyu(s): # -> unicode
if isinstance(s, unicode): # py2: unicode py3: str if isinstance(s, unicode): # py2: unicode py3: str
pass pass
elif isinstance(s, bytes): # py2: str py3: bytes elif isinstance(s, bytes): # py2: str py3: bytes
if PY_MAJOR_VERSION >= 3: s = _utf8_decode_surrogateescape(s)
s = s.decode('UTF-8', 'surrogateescape')
else:
# py2 does not have surrogateescape error handler, and even if we
# provide one, builtin bytes.decode() does not treat surrogate
# sequences as error. -> Do the decoding ourselves.
s = _utf8_decode_surrogateescape(s)
else: else:
raise TypeError("u: invalid type %s" % type(s)) raise TypeError("u: invalid type %s" % type(s))
...@@ -243,6 +231,12 @@ def _utf8_decode_rune(s): ...@@ -243,6 +231,12 @@ def _utf8_decode_rune(s):
# _utf8_decode_surrogateescape mimics s.decode('utf-8', 'surrogateescape') from py3. # _utf8_decode_surrogateescape mimics s.decode('utf-8', 'surrogateescape') from py3.
def _utf8_decode_surrogateescape(s): # -> unicode def _utf8_decode_surrogateescape(s): # -> unicode
assert isinstance(s, bytes) assert isinstance(s, bytes)
if PY_MAJOR_VERSION >= 3:
return s.decode('UTF-8', 'surrogateescape')
# py2 does not have surrogateescape error handler, and even if we
# provide one, builtin bytes.decode() does not treat surrogate
# sequences as error. -> Do the decoding ourselves.
outv = [] outv = []
emit = outv.append emit = outv.append
...@@ -276,6 +270,12 @@ def _utf8_decode_surrogateescape(s): # -> unicode ...@@ -276,6 +270,12 @@ def _utf8_decode_surrogateescape(s): # -> unicode
# _utf8_encode_surrogateescape mimics s.encode('utf-8', 'surrogateescape') from py3. # _utf8_encode_surrogateescape mimics s.encode('utf-8', 'surrogateescape') from py3.
def _utf8_encode_surrogateescape(s): # -> bytes def _utf8_encode_surrogateescape(s): # -> bytes
assert isinstance(s, unicode) assert isinstance(s, unicode)
if PY_MAJOR_VERSION >= 3:
return s.encode('UTF-8', 'surrogateescape')
# py2 does not have surrogateescape error handler, and even if we
# provide one, builtin unicode.encode() does not treat
# \udc80-\udcff as error. -> Do the encoding ourselves.
outv = [] outv = []
emit = outv.append emit = outv.append
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment