Commit 513be11e authored by Kirill Smelkov's avatar Kirill Smelkov

X bstr: bytes -> xbytes, unicode -> xunicode

Builtin types bytes and unicode will be patched.
xbytes and xunicode will refer to original unpatched types.
parent 300d7dfa
...@@ -54,6 +54,10 @@ cdef extern from "Python.h": ...@@ -54,6 +54,10 @@ cdef extern from "Python.h":
object (*sq_slice) (object, Py_ssize_t, Py_ssize_t) # present only on py2 object (*sq_slice) (object, Py_ssize_t, Py_ssize_t) # present only on py2
PyTypeObject PyUnicode_Type
PyTypeObject PyBytes_Type
from libc.stdint cimport uint8_t from libc.stdint cimport uint8_t
from libc.stdio cimport FILE from libc.stdio cimport FILE
...@@ -68,6 +72,10 @@ else: ...@@ -68,6 +72,10 @@ else:
import copy_reg as pycopyreg import copy_reg as pycopyreg
cdef object xunicode = <object>(&PyUnicode_Type) # always points to std unicode type
cdef object xbytes = <object>(&PyBytes_Type) # ----//---- std bytes type
def pyb(s): # -> bstr def pyb(s): # -> bstr
"""b converts object to bstr. """b converts object to bstr.
...@@ -117,37 +125,37 @@ cdef _pyb(bcls, s): # -> ~bstr | None ...@@ -117,37 +125,37 @@ cdef _pyb(bcls, s): # -> ~bstr | None
if type(s) is bcls: if type(s) is bcls:
return s return s
if isinstance(s, bytes): if isinstance(s, xbytes):
if type(s) is not bytes: if type(s) is not xbytes:
s = _bdata(s) s = _bdata(s)
elif isinstance(s, unicode): elif isinstance(s, xunicode):
s = _utf8_encode_surrogateescape(s) s = _utf8_encode_surrogateescape(s)
else: else:
s = _ifbuffer_data(s) # bytearray and buffer s = _ifbuffer_data(s) # bytearray and buffer
if s is None: if s is None:
return None return None
assert type(s) is bytes assert type(s) is bytes # XXX not `is xbytes`
return bytes.__new__(bcls, s) return xbytes.__new__(bcls, s)
cdef _pyu(ucls, s): # -> ~ustr | None cdef _pyu(ucls, s): # -> ~ustr | None
if type(s) is ucls: if type(s) is ucls:
return s return s
if isinstance(s, unicode): if isinstance(s, xunicode):
if type(s) is not unicode: if type(s) is not xunicode:
s = _udata(s) s = _udata(s)
else: else:
_ = _ifbuffer_data(s) # bytearray and buffer _ = _ifbuffer_data(s) # bytearray and buffer
if _ is not None: if _ is not None:
s = _ s = _
if isinstance(s, bytes): if isinstance(s, xbytes):
s = _utf8_decode_surrogateescape(s) s = _utf8_decode_surrogateescape(s)
else: else:
return None return None
assert type(s) is unicode assert type(s) is unicode # XXX not `is xunicode`
return unicode.__new__(ucls, s) return xunicode.__new__(ucls, s)
# _ifbuffer_data returns contained data if obj provides buffer interface. # _ifbuffer_data returns contained data if obj provides buffer interface.
cdef _ifbuffer_data(obj): # -> bytes|None cdef _ifbuffer_data(obj): # -> bytes|None
...@@ -165,18 +173,18 @@ cdef _ifbuffer_data(obj): # -> bytes|None ...@@ -165,18 +173,18 @@ cdef _ifbuffer_data(obj): # -> bytes|None
# _pyb_coerce coerces x from `b op x` to be used in operation with pyb. # _pyb_coerce coerces x from `b op x` to be used in operation with pyb.
cdef _pyb_coerce(x): # -> bstr|bytes cdef _pyb_coerce(x): # -> bstr|bytes
if isinstance(x, bytes): if isinstance(x, xbytes):
return x return x
elif isinstance(x, (unicode, bytearray)): elif isinstance(x, (xunicode, bytearray)):
return pyb(x) return pyb(x)
else: else:
raise TypeError("b: coerce: invalid type %s" % type(x)) raise TypeError("b: coerce: invalid type %s" % type(x))
# _pyu_coerce coerces x from `u op x` to be used in operation with pyu. # _pyu_coerce coerces x from `u op x` to be used in operation with pyu.
cdef _pyu_coerce(x): # -> ustr|unicode cdef _pyu_coerce(x): # -> ustr|unicode
if isinstance(x, unicode): if isinstance(x, xunicode):
return x return x
elif isinstance(x, (bytes, bytearray)): elif isinstance(x, (xbytes, bytearray)):
return pyu(x) return pyu(x)
else: else:
raise TypeError("u: coerce: invalid type %s" % type(x)) raise TypeError("u: coerce: invalid type %s" % type(x))
...@@ -184,9 +192,9 @@ cdef _pyu_coerce(x): # -> ustr|unicode ...@@ -184,9 +192,9 @@ cdef _pyu_coerce(x): # -> ustr|unicode
# _pybu_rcoerce coerces x from `x op b|u` to either bstr or ustr. # _pybu_rcoerce coerces x from `x op b|u` to either bstr or ustr.
# NOTE bytearray is handled outside of this function. # NOTE bytearray is handled outside of this function.
cdef _pybu_rcoerce(x): # -> bstr|ustr cdef _pybu_rcoerce(x): # -> bstr|ustr
if isinstance(x, bytes): if isinstance(x, xbytes):
return pyb(x) return pyb(x)
elif isinstance(x, unicode): elif isinstance(x, xunicode):
return pyu(x) return pyu(x)
else: else:
raise TypeError('b/u: coerce: invalid type %s' % type(x)) raise TypeError('b/u: coerce: invalid type %s' % type(x))
...@@ -264,7 +272,7 @@ class pybstr(bytes): ...@@ -264,7 +272,7 @@ class pybstr(bytes):
# _bstringify. Note: it handles bstr/ustr / unicode/bytes/bytearray as documented # _bstringify. Note: it handles bstr/ustr / unicode/bytes/bytearray as documented
object = _bstringify(object) object = _bstringify(object)
assert isinstance(object, (unicode, bytes)), object assert isinstance(object, (xunicode, xbytes)), object
bobj = _pyb(cls, object) bobj = _pyb(cls, object)
assert bobj is not None assert bobj is not None
return bobj return bobj
...@@ -296,7 +304,7 @@ class pybstr(bytes): ...@@ -296,7 +304,7 @@ class pybstr(bytes):
# retrieve state, which gives bstr, not bytes. Fix state to be bytes ourselves. # retrieve state, which gives bstr, not bytes. Fix state to be bytes ourselves.
def __reduce_ex__(self, protocol): def __reduce_ex__(self, protocol):
if protocol >= 2: if protocol >= 2:
return bytes.__reduce_ex__(self, protocol) return xbytes.__reduce_ex__(self, protocol)
return ( return (
pycopyreg._reconstructor, pycopyreg._reconstructor,
(self.__class__, self.__class__, _bdata(self)) (self.__class__, self.__class__, _bdata(self))
...@@ -311,7 +319,7 @@ class pybstr(bytes): ...@@ -311,7 +319,7 @@ class pybstr(bytes):
if PY_MAJOR_VERSION >= 3: if PY_MAJOR_VERSION >= 3:
return hash(pyu(self)) return hash(pyu(self))
else: else:
return bytes.__hash__(self) return xbytes.__hash__(self)
# == != < > <= >= # == != < > <= >=
# NOTE == and != are special: they must succeed against any type so that # NOTE == and != are special: they must succeed against any type so that
...@@ -321,18 +329,18 @@ class pybstr(bytes): ...@@ -321,18 +329,18 @@ class pybstr(bytes):
b = _pyb_coerce(b) b = _pyb_coerce(b)
except TypeError: except TypeError:
return False return False
return bytes.__eq__(a, b) return xbytes.__eq__(a, b)
def __ne__(a, b): return not a.__eq__(b) def __ne__(a, b): return not a.__eq__(b)
def __lt__(a, b): return bytes.__lt__(a, _pyb_coerce(b)) def __lt__(a, b): return xbytes.__lt__(a, _pyb_coerce(b))
def __gt__(a, b): return bytes.__gt__(a, _pyb_coerce(b)) def __gt__(a, b): return xbytes.__gt__(a, _pyb_coerce(b))
def __le__(a, b): return bytes.__le__(a, _pyb_coerce(b)) def __le__(a, b): return xbytes.__le__(a, _pyb_coerce(b))
def __ge__(a, b): return bytes.__ge__(a, _pyb_coerce(b)) def __ge__(a, b): return xbytes.__ge__(a, _pyb_coerce(b))
# len - no need to override # len - no need to override
# [], [:] # [], [:]
def __getitem__(self, idx): def __getitem__(self, idx):
x = bytes.__getitem__(self, idx) x = xbytes.__getitem__(self, idx)
if type(idx) is slice: if type(idx) is slice:
return pyb(x) return pyb(x)
else: else:
...@@ -353,7 +361,7 @@ class pybstr(bytes): ...@@ -353,7 +361,7 @@ class pybstr(bytes):
def __contains__(self, key): def __contains__(self, key):
# NOTE on py3 bytes.__contains__ accepts numbers and buffers. We don't want to # NOTE on py3 bytes.__contains__ accepts numbers and buffers. We don't want to
# automatically coerce any of them to bytestrings # automatically coerce any of them to bytestrings
return bytes.__contains__(self, _pyb_coerce(key)) return xbytes.__contains__(self, _pyb_coerce(key))
# __add__, __radd__ (no need to override __iadd__) # __add__, __radd__ (no need to override __iadd__)
...@@ -362,7 +370,7 @@ class pybstr(bytes): ...@@ -362,7 +370,7 @@ class pybstr(bytes):
# https://cython.readthedocs.io/en/latest/src/userguide/migrating_to_cy30.html#arithmetic-special-methods # https://cython.readthedocs.io/en/latest/src/userguide/migrating_to_cy30.html#arithmetic-special-methods
# but pybstr is currently _not_ cdef'ed class # but pybstr is currently _not_ cdef'ed class
# see also https://github.com/cython/cython/issues/4750 # see also https://github.com/cython/cython/issues/4750
return pyb(bytes.__add__(a, _pyb_coerce(b))) return pyb(xbytes.__add__(a, _pyb_coerce(b)))
def __radd__(b, a): def __radd__(b, a):
# a.__add__(b) returned NotImplementedError, e.g. for unicode.__add__(bstr) # a.__add__(b) returned NotImplementedError, e.g. for unicode.__add__(bstr)
...@@ -377,7 +385,7 @@ class pybstr(bytes): ...@@ -377,7 +385,7 @@ class pybstr(bytes):
# __mul__, __rmul__ (no need to override __imul__) # __mul__, __rmul__ (no need to override __imul__)
def __mul__(a, b): def __mul__(a, b):
return pyb(bytes.__mul__(a, b)) return pyb(xbytes.__mul__(a, b))
def __rmul__(b, a): def __rmul__(b, a):
return b.__mul__(a) return b.__mul__(a)
...@@ -421,7 +429,7 @@ class pybstr(bytes): ...@@ -421,7 +429,7 @@ class pybstr(bytes):
if encoding == 'utf-8' and errors == 'surrogateescape': if encoding == 'utf-8' and errors == 'surrogateescape':
x = _utf8_decode_surrogateescape(self) x = _utf8_decode_surrogateescape(self)
else: else:
x = bytes.decode(self, encoding, errors) x = xbytes.decode(self, encoding, errors)
return pyu(x) return pyu(x)
if PY_MAJOR_VERSION < 3: if PY_MAJOR_VERSION < 3:
...@@ -437,7 +445,7 @@ class pybstr(bytes): ...@@ -437,7 +445,7 @@ class pybstr(bytes):
def casefold(self): return pyb(pyu(self).casefold()) def casefold(self): return pyb(pyu(self).casefold())
def center(self, width, fillchar=' '): return pyb(pyu(self).center(width, fillchar)) def center(self, width, fillchar=' '): return pyb(pyu(self).center(width, fillchar))
def count(self, sub, start=None, end=None): return bytes.count(self, _pyb_coerce(sub), start, end) def count(self, sub, start=None, end=None): return xbytes.count(self, _pyb_coerce(sub), start, end)
def endswith(self, suffix, start=None, end=None): def endswith(self, suffix, start=None, end=None):
if isinstance(suffix, tuple): if isinstance(suffix, tuple):
...@@ -447,13 +455,13 @@ class pybstr(bytes): ...@@ -447,13 +455,13 @@ class pybstr(bytes):
return False return False
if start is None: start = 0 if start is None: start = 0
if end is None: end = PY_SSIZE_T_MAX if end is None: end = PY_SSIZE_T_MAX
return bytes.endswith(self, _pyb_coerce(suffix), start, end) return xbytes.endswith(self, _pyb_coerce(suffix), start, end)
def expandtabs(self, tabsize=8): return pyb(pyu(self).expandtabs(tabsize)) def expandtabs(self, tabsize=8): return pyb(pyu(self).expandtabs(tabsize))
# NOTE find/index & friends should return byte-position, not unicode-position # NOTE find/index & friends should return byte-position, not unicode-position
def find(self, sub, start=None, end=None): return bytes.find(self, _pyb_coerce(sub), start, end) def find(self, sub, start=None, end=None): return xbytes.find(self, _pyb_coerce(sub), start, end)
def index(self, sub, start=None, end=None): return bytes.index(self, _pyb_coerce(sub), start, end) def index(self, sub, start=None, end=None): return xbytes.index(self, _pyb_coerce(sub), start, end)
def isalnum(self): return pyu(self).isalnum() def isalnum(self): return pyu(self).isalnum()
def isalpha(self): return pyu(self).isalpha() def isalpha(self): return pyu(self).isalpha()
...@@ -469,23 +477,23 @@ class pybstr(bytes): ...@@ -469,23 +477,23 @@ class pybstr(bytes):
def isspace(self): return pyu(self).isspace() def isspace(self): return pyu(self).isspace()
def istitle(self): return pyu(self).istitle() def istitle(self): return pyu(self).istitle()
def join(self, iterable): return pyb(bytes.join(self, (_pyb_coerce(_) for _ in iterable))) def join(self, iterable): return pyb(xbytes.join(self, (_pyb_coerce(_) for _ in iterable)))
def ljust(self, width, fillchar=' '): return pyb(pyu(self).ljust(width, fillchar)) def ljust(self, width, fillchar=' '): return pyb(pyu(self).ljust(width, fillchar))
def lower(self): return pyb(pyu(self).lower()) def lower(self): return pyb(pyu(self).lower())
def lstrip(self, chars=None): return pyb(pyu(self).lstrip(chars)) def lstrip(self, chars=None): return pyb(pyu(self).lstrip(chars))
def partition(self, sep): return tuple(pyb(_) for _ in bytes.partition(self, _pyb_coerce(sep))) def partition(self, sep): return tuple(pyb(_) for _ in xbytes.partition(self, _pyb_coerce(sep)))
if _strhas('removeprefix'): # py3.9 TODO provide fallback implementation if _strhas('removeprefix'): # py3.9 TODO provide fallback implementation
def removeprefix(self, prefix): return pyb(pyu(self).removeprefix(prefix)) def removeprefix(self, prefix): return pyb(pyu(self).removeprefix(prefix))
if _strhas('removesuffix'): # py3.9 TODO provide fallback implementation if _strhas('removesuffix'): # py3.9 TODO provide fallback implementation
def removesuffix(self, suffix): return pyb(pyu(self).removesuffix(suffix)) def removesuffix(self, suffix): return pyb(pyu(self).removesuffix(suffix))
def replace(self, old, new, count=-1): return pyb(bytes.replace(self, _pyb_coerce(old), _pyb_coerce(new), count)) def replace(self, old, new, count=-1): return pyb(xbytes.replace(self, _pyb_coerce(old), _pyb_coerce(new), count))
# NOTE rfind/rindex & friends should return byte-position, not unicode-position # NOTE rfind/rindex & friends should return byte-position, not unicode-position
def rfind(self, sub, start=None, end=None): return bytes.rfind(self, _pyb_coerce(sub), start, end) def rfind(self, sub, start=None, end=None): return xbytes.rfind(self, _pyb_coerce(sub), start, end)
def rindex(self, sub, start=None, end=None): return bytes.rindex(self, _pyb_coerce(sub), start, end) def rindex(self, sub, start=None, end=None): return xbytes.rindex(self, _pyb_coerce(sub), start, end)
def rjust(self, width, fillchar=' '): return pyb(pyu(self).rjust(width, fillchar)) def rjust(self, width, fillchar=' '): return pyb(pyu(self).rjust(width, fillchar))
def rpartition(self, sep): return tuple(pyb(_) for _ in bytes.rpartition(self, _pyb_coerce(sep))) def rpartition(self, sep): return tuple(pyb(_) for _ in xbytes.rpartition(self, _pyb_coerce(sep)))
def rsplit(self, sep=None, maxsplit=-1): def rsplit(self, sep=None, maxsplit=-1):
v = pyu(self).rsplit(sep, maxsplit) v = pyu(self).rsplit(sep, maxsplit)
return list([pyb(_) for _ in v]) return list([pyb(_) for _ in v])
...@@ -503,16 +511,16 @@ class pybstr(bytes): ...@@ -503,16 +511,16 @@ class pybstr(bytes):
return False return False
if start is None: start = 0 if start is None: start = 0
if end is None: end = PY_SSIZE_T_MAX if end is None: end = PY_SSIZE_T_MAX
return bytes.startswith(self, _pyb_coerce(prefix), start, end) return xbytes.startswith(self, _pyb_coerce(prefix), start, end)
def strip(self, chars=None): return pyb(pyu(self).strip(chars)) def strip(self, chars=None): return pyb(pyu(self).strip(chars))
def swapcase(self): return pyb(pyu(self).swapcase()) def swapcase(self): return pyb(pyu(self).swapcase())
def title(self): return pyb(pyu(self).title()) def title(self): return pyb(pyu(self).title())
def translate(self, table, delete=None): def translate(self, table, delete=None):
# bytes mode (compatibility with str/py2) # bytes mode (compatibility with str/py2)
if table is None or isinstance(table, bytes) or delete is not None: if table is None or isinstance(table, xbytes) or delete is not None:
if delete is None: delete = b'' if delete is None: delete = b''
return pyb(bytes.translate(self, table, delete)) return pyb(xbytes.translate(self, table, delete))
# unicode mode # unicode mode
else: else:
return pyb(pyu(self).translate(table)) return pyb(pyu(self).translate(table))
...@@ -564,7 +572,7 @@ class pyustr(unicode): ...@@ -564,7 +572,7 @@ class pyustr(unicode):
# _bstringify. Note: it handles bstr/ustr / unicode/bytes/bytearray as documented # _bstringify. Note: it handles bstr/ustr / unicode/bytes/bytearray as documented
object = _bstringify(object) object = _bstringify(object)
assert isinstance(object, (unicode, bytes)), object assert isinstance(object, (xunicode, xbytes)), object
uobj = _pyu(cls, object) uobj = _pyu(cls, object)
assert uobj is not None assert uobj is not None
return uobj return uobj
...@@ -596,7 +604,7 @@ class pyustr(unicode): ...@@ -596,7 +604,7 @@ class pyustr(unicode):
# retrieve state, which gives ustr, not unicode. Fix state to be unicode ourselves. # retrieve state, which gives ustr, not unicode. Fix state to be unicode ourselves.
def __reduce_ex__(self, protocol): def __reduce_ex__(self, protocol):
if protocol >= 2: if protocol >= 2:
return unicode.__reduce_ex__(self, protocol) return xunicode.__reduce_ex__(self, protocol)
return ( return (
pycopyreg._reconstructor, pycopyreg._reconstructor,
(self.__class__, self.__class__, _udata(self)) (self.__class__, self.__class__, _udata(self))
...@@ -606,7 +614,7 @@ class pyustr(unicode): ...@@ -606,7 +614,7 @@ class pyustr(unicode):
def __hash__(self): def __hash__(self):
# see pybstr.__hash__ for why we stick to hash of current str # see pybstr.__hash__ for why we stick to hash of current str
if PY_MAJOR_VERSION >= 3: if PY_MAJOR_VERSION >= 3:
return unicode.__hash__(self) return xunicode.__hash__(self)
else: else:
return hash(pyb(self)) return hash(pyb(self))
...@@ -618,23 +626,23 @@ class pyustr(unicode): ...@@ -618,23 +626,23 @@ class pyustr(unicode):
b = _pyu_coerce(b) b = _pyu_coerce(b)
except TypeError: except TypeError:
return False return False
return unicode.__eq__(a, b) return xunicode.__eq__(a, b)
def __ne__(a, b): return not a.__eq__(b) def __ne__(a, b): return not a.__eq__(b)
def __lt__(a, b): return unicode.__lt__(a, _pyu_coerce(b)) def __lt__(a, b): return xunicode.__lt__(a, _pyu_coerce(b))
def __gt__(a, b): return unicode.__gt__(a, _pyu_coerce(b)) def __gt__(a, b): return xunicode.__gt__(a, _pyu_coerce(b))
def __le__(a, b): return unicode.__le__(a, _pyu_coerce(b)) def __le__(a, b): return xunicode.__le__(a, _pyu_coerce(b))
def __ge__(a, b): return unicode.__ge__(a, _pyu_coerce(b)) def __ge__(a, b): return xunicode.__ge__(a, _pyu_coerce(b))
# len - no need to override # len - no need to override
# [], [:] # [], [:]
def __getitem__(self, idx): def __getitem__(self, idx):
return pyu(unicode.__getitem__(self, idx)) return pyu(xunicode.__getitem__(self, idx))
# __iter__ # __iter__
def __iter__(self): def __iter__(self):
if PY_MAJOR_VERSION >= 3: if PY_MAJOR_VERSION >= 3:
return _pyustrIter(unicode.__iter__(self)) return _pyustrIter(xunicode.__iter__(self))
else: else:
# on python 2 unicode does not have .__iter__ # on python 2 unicode does not have .__iter__
return PySeqIter_New(self) return PySeqIter_New(self)
...@@ -642,7 +650,7 @@ class pyustr(unicode): ...@@ -642,7 +650,7 @@ class pyustr(unicode):
# __contains__ # __contains__
def __contains__(self, key): def __contains__(self, key):
return unicode.__contains__(self, _pyu_coerce(key)) return xunicode.__contains__(self, _pyu_coerce(key))
# __add__, __radd__ (no need to override __iadd__) # __add__, __radd__ (no need to override __iadd__)
...@@ -651,7 +659,7 @@ class pyustr(unicode): ...@@ -651,7 +659,7 @@ class pyustr(unicode):
# https://cython.readthedocs.io/en/latest/src/userguide/migrating_to_cy30.html#arithmetic-special-methods # https://cython.readthedocs.io/en/latest/src/userguide/migrating_to_cy30.html#arithmetic-special-methods
# but pyustr is currently _not_ cdef'ed class # but pyustr is currently _not_ cdef'ed class
# see also https://github.com/cython/cython/issues/4750 # see also https://github.com/cython/cython/issues/4750
return pyu(unicode.__add__(a, _pyu_coerce(b))) return pyu(xunicode.__add__(a, _pyu_coerce(b)))
def __radd__(b, a): def __radd__(b, a):
# a.__add__(b) returned NotImplementedError, e.g. for unicode.__add__(bstr) # a.__add__(b) returned NotImplementedError, e.g. for unicode.__add__(bstr)
...@@ -668,7 +676,7 @@ class pyustr(unicode): ...@@ -668,7 +676,7 @@ class pyustr(unicode):
# __mul__, __rmul__ (no need to override __imul__) # __mul__, __rmul__ (no need to override __imul__)
def __mul__(a, b): def __mul__(a, b):
return pyu(unicode.__mul__(a, b)) return pyu(xunicode.__mul__(a, b))
def __rmul__(b, a): def __rmul__(b, a):
return b.__mul__(a) return b.__mul__(a)
...@@ -693,7 +701,7 @@ class pyustr(unicode): ...@@ -693,7 +701,7 @@ class pyustr(unicode):
# NOTE not e.g. `_bvformat(_pyu_coerce(format_spec), (self,))` because # NOTE not e.g. `_bvformat(_pyu_coerce(format_spec), (self,))` because
# the only format code that string.__format__ should support is # the only format code that string.__format__ should support is
# 's', not e.g. 'r'. # 's', not e.g. 'r'.
return pyu(unicode.__format__(self, format_spec)) return pyu(xunicode.__format__(self, format_spec))
# encode/decode # encode/decode
...@@ -708,7 +716,7 @@ class pyustr(unicode): ...@@ -708,7 +716,7 @@ class pyustr(unicode):
if encoding == 'utf-8' and errors == 'surrogateescape': if encoding == 'utf-8' and errors == 'surrogateescape':
x = _utf8_encode_surrogateescape(self) x = _utf8_encode_surrogateescape(self)
else: else:
x = unicode.encode(self, encoding, errors) x = xunicode.encode(self, encoding, errors)
return pyb(x) return pyb(x)
if PY_MAJOR_VERSION < 3: if PY_MAJOR_VERSION < 3:
...@@ -719,16 +727,16 @@ class pyustr(unicode): ...@@ -719,16 +727,16 @@ class pyustr(unicode):
# all other string methods # all other string methods
def capitalize(self): return pyu(unicode.capitalize(self)) def capitalize(self): return pyu(xunicode.capitalize(self))
if _strhas('casefold'): # py3.3 TODO provide fallback implementation if _strhas('casefold'): # py3.3 TODO provide fallback implementation
def casefold(self): return pyu(unicode.casefold(self)) def casefold(self): return pyu(xunicode.casefold(self))
def center(self, width, fillchar=' '): return pyu(unicode.center(self, width, _pyu_coerce(fillchar))) def center(self, width, fillchar=' '): return pyu(xunicode.center(self, width, _pyu_coerce(fillchar)))
def count(self, sub, start=None, end=None): def count(self, sub, start=None, end=None):
# cython optimizes unicode.count to directly call PyUnicode_Count - # cython optimizes unicode.count to directly call PyUnicode_Count -
# - cannot use None for start/stop https://github.com/cython/cython/issues/4737 # - cannot use None for start/stop https://github.com/cython/cython/issues/4737
if start is None: start = 0 if start is None: start = 0
if end is None: end = PY_SSIZE_T_MAX if end is None: end = PY_SSIZE_T_MAX
return unicode.count(self, _pyu_coerce(sub), start, end) return xunicode.count(self, _pyu_coerce(sub), start, end)
def endswith(self, suffix, start=None, end=None): def endswith(self, suffix, start=None, end=None):
if isinstance(suffix, tuple): if isinstance(suffix, tuple):
for _ in suffix: for _ in suffix:
...@@ -737,16 +745,16 @@ class pyustr(unicode): ...@@ -737,16 +745,16 @@ class pyustr(unicode):
return False return False
if start is None: start = 0 if start is None: start = 0
if end is None: end = PY_SSIZE_T_MAX if end is None: end = PY_SSIZE_T_MAX
return unicode.endswith(self, _pyu_coerce(suffix), start, end) return xunicode.endswith(self, _pyu_coerce(suffix), start, end)
def expandtabs(self, tabsize=8): return pyu(unicode.expandtabs(self, tabsize)) def expandtabs(self, tabsize=8): return pyu(xunicode.expandtabs(self, tabsize))
def find(self, sub, start=None, end=None): def find(self, sub, start=None, end=None):
if start is None: start = 0 if start is None: start = 0
if end is None: end = PY_SSIZE_T_MAX if end is None: end = PY_SSIZE_T_MAX
return unicode.find(self, _pyu_coerce(sub), start, end) return xunicode.find(self, _pyu_coerce(sub), start, end)
def index(self, sub, start=None, end=None): def index(self, sub, start=None, end=None):
if start is None: start = 0 if start is None: start = 0
if end is None: end = PY_SSIZE_T_MAX if end is None: end = PY_SSIZE_T_MAX
return unicode.index(self, _pyu_coerce(sub), start, end) return xunicode.index(self, _pyu_coerce(sub), start, end)
# isalnum(self) no need to override # isalnum(self) no need to override
# isalpha(self) no need to override # isalpha(self) no need to override
...@@ -760,43 +768,43 @@ class pyustr(unicode): ...@@ -760,43 +768,43 @@ class pyustr(unicode):
# isspace(self) no need to override # isspace(self) no need to override
# istitle(self) no need to override # istitle(self) no need to override
def join(self, iterable): return pyu(unicode.join(self, (_pyu_coerce(_) for _ in iterable))) def join(self, iterable): return pyu(xunicode.join(self, (_pyu_coerce(_) for _ in iterable)))
def ljust(self, width, fillchar=' '): return pyu(unicode.ljust(self, width, _pyu_coerce(fillchar))) def ljust(self, width, fillchar=' '): return pyu(xunicode.ljust(self, width, _pyu_coerce(fillchar)))
def lower(self): return pyu(unicode.lower(self)) def lower(self): return pyu(xunicode.lower(self))
def lstrip(self, chars=None): return pyu(unicode.lstrip(self, _xpyu_coerce(chars))) def lstrip(self, chars=None): return pyu(xunicode.lstrip(self, _xpyu_coerce(chars)))
def partition(self, sep): return tuple(pyu(_) for _ in unicode.partition(self, _pyu_coerce(sep))) def partition(self, sep): return tuple(pyu(_) for _ in xunicode.partition(self, _pyu_coerce(sep)))
if _strhas('removeprefix'): # py3.9 TODO provide fallback implementation if _strhas('removeprefix'): # py3.9 TODO provide fallback implementation
def removeprefix(self, prefix): return pyu(unicode.removeprefix(self, _pyu_coerce(prefix))) def removeprefix(self, prefix): return pyu(xunicode.removeprefix(self, _pyu_coerce(prefix)))
if _strhas('removesuffix'): # py3.9 TODO provide fallback implementation if _strhas('removesuffix'): # py3.9 TODO provide fallback implementation
def removesuffix(self, suffix): return pyu(unicode.removesuffix(self, _pyu_coerce(suffix))) def removesuffix(self, suffix): return pyu(xunicode.removesuffix(self, _pyu_coerce(suffix)))
def replace(self, old, new, count=-1): return pyu(unicode.replace(self, _pyu_coerce(old), _pyu_coerce(new), count)) def replace(self, old, new, count=-1): return pyu(xunicode.replace(self, _pyu_coerce(old), _pyu_coerce(new), count))
def rfind(self, sub, start=None, end=None): def rfind(self, sub, start=None, end=None):
if start is None: start = 0 if start is None: start = 0
if end is None: end = PY_SSIZE_T_MAX if end is None: end = PY_SSIZE_T_MAX
return unicode.rfind(self, _pyu_coerce(sub), start, end) return xunicode.rfind(self, _pyu_coerce(sub), start, end)
def rindex(self, sub, start=None, end=None): def rindex(self, sub, start=None, end=None):
if start is None: start = 0 if start is None: start = 0
if end is None: end = PY_SSIZE_T_MAX if end is None: end = PY_SSIZE_T_MAX
return unicode.rindex(self, _pyu_coerce(sub), start, end) return xunicode.rindex(self, _pyu_coerce(sub), start, end)
def rjust(self, width, fillchar=' '): return pyu(unicode.rjust(self, width, _pyu_coerce(fillchar))) def rjust(self, width, fillchar=' '): return pyu(xunicode.rjust(self, width, _pyu_coerce(fillchar)))
def rpartition(self, sep): return tuple(pyu(_) for _ in unicode.rpartition(self, _pyu_coerce(sep))) def rpartition(self, sep): return tuple(pyu(_) for _ in xunicode.rpartition(self, _pyu_coerce(sep)))
def rsplit(self, sep=None, maxsplit=-1): def rsplit(self, sep=None, maxsplit=-1):
v = unicode.rsplit(self, _xpyu_coerce(sep), maxsplit) v = xunicode.rsplit(self, _xpyu_coerce(sep), maxsplit)
return list([pyu(_) for _ in v]) return list([pyu(_) for _ in v])
def rstrip(self, chars=None): return pyu(unicode.rstrip(self, _xpyu_coerce(chars))) def rstrip(self, chars=None): return pyu(xunicode.rstrip(self, _xpyu_coerce(chars)))
def split(self, sep=None, maxsplit=-1): def split(self, sep=None, maxsplit=-1):
# cython optimizes unicode.split to directly call PyUnicode_Split - cannot use None for sep # cython optimizes unicode.split to directly call PyUnicode_Split - cannot use None for sep
# and cannot also use object=NULL https://github.com/cython/cython/issues/4737 # and cannot also use object=NULL https://github.com/cython/cython/issues/4737
if sep is None: if sep is None:
if PY_MAJOR_VERSION >= 3: if PY_MAJOR_VERSION >= 3:
v = unicode.split(self, maxsplit=maxsplit) v = xunicode.split(self, maxsplit=maxsplit)
else: else:
# on py2 unicode.split does not accept keyword arguments # on py2 xunicode.split does not accept keyword arguments
v = _udata(self).split(None, maxsplit) v = _udata(self).split(None, maxsplit)
else: else:
v = unicode.split(self, _pyu_coerce(sep), maxsplit) v = xunicode.split(self, _pyu_coerce(sep), maxsplit)
return list([pyu(_) for _ in v]) return list([pyu(_) for _ in v])
def splitlines(self, keepends=False): return list(pyu(_) for _ in unicode.splitlines(self, keepends)) def splitlines(self, keepends=False): return list(pyu(_) for _ in xunicode.splitlines(self, keepends))
def startswith(self, prefix, start=None, end=None): def startswith(self, prefix, start=None, end=None):
if isinstance(prefix, tuple): if isinstance(prefix, tuple):
for _ in prefix: for _ in prefix:
...@@ -805,10 +813,10 @@ class pyustr(unicode): ...@@ -805,10 +813,10 @@ class pyustr(unicode):
return False return False
if start is None: start = 0 if start is None: start = 0
if end is None: end = PY_SSIZE_T_MAX if end is None: end = PY_SSIZE_T_MAX
return unicode.startswith(self, _pyu_coerce(prefix), start, end) return xunicode.startswith(self, _pyu_coerce(prefix), start, end)
def strip(self, chars=None): return pyu(unicode.strip(self, _xpyu_coerce(chars))) def strip(self, chars=None): return pyu(xunicode.strip(self, _xpyu_coerce(chars)))
def swapcase(self): return pyu(unicode.swapcase(self)) def swapcase(self): return pyu(xunicode.swapcase(self))
def title(self): return pyu(unicode.title(self)) def title(self): return pyu(xunicode.title(self))
def translate(self, table): def translate(self, table):
# unicode.translate does not accept bstr values # unicode.translate does not accept bstr values
...@@ -817,10 +825,10 @@ class pyustr(unicode): ...@@ -817,10 +825,10 @@ class pyustr(unicode):
if not isinstance(v, int): # either unicode ordinal, if not isinstance(v, int): # either unicode ordinal,
v = _xpyu_coerce(v) # character or None v = _xpyu_coerce(v) # character or None
t[k] = v t[k] = v
return pyu(unicode.translate(self, t)) return pyu(xunicode.translate(self, t))
def upper(self): return pyu(unicode.upper(self)) def upper(self): return pyu(xunicode.upper(self))
def zfill(self, width): return pyu(unicode.zfill(self, width)) def zfill(self, width): return pyu(xunicode.zfill(self, width))
@staticmethod @staticmethod
def maketrans(x=None, y=None, z=None): def maketrans(x=None, y=None, z=None):
...@@ -832,11 +840,11 @@ class pyustr(unicode): ...@@ -832,11 +840,11 @@ class pyustr(unicode):
if not isinstance(k, int): if not isinstance(k, int):
k = pyu(k) k = pyu(k)
_[k] = v _[k] = v
return unicode.maketrans(_) return xunicode.maketrans(_)
elif z is None: elif z is None:
return unicode.maketrans(pyu(x), pyu(y)) # std maketrans does not accept b return xunicode.maketrans(pyu(x), pyu(y)) # std maketrans does not accept b
else: else:
return unicode.maketrans(pyu(x), pyu(y), pyu(z)) # ----//---- return xunicode.maketrans(pyu(x), pyu(y), pyu(z)) # ----//----
# hand-made on py2 # hand-made on py2
t = {} t = {}
...@@ -875,19 +883,19 @@ cdef class _pyustrIter: ...@@ -875,19 +883,19 @@ cdef class _pyustrIter:
# _bdata/_udata retrieve raw data from bytes/unicode. # _bdata/_udata retrieve raw data from bytes/unicode.
def _bdata(obj): # -> bytes def _bdata(obj): # -> bytes
assert isinstance(obj, bytes) assert isinstance(obj, xbytes)
_ = obj.__getnewargs__()[0] # (`bytes-data`,) _ = obj.__getnewargs__()[0] # (`bytes-data`,)
assert type(_) is bytes assert type(_) is xbytes
return _ return _
""" """
bcopy = bytes(memoryview(obj)) bcopy = xbytes(memoryview(obj))
assert type(bcopy) is bytes assert type(bcopy) is bytes
return bcopy return bcopy
""" """
def _udata(obj): # -> unicode def _udata(obj): # -> unicode
assert isinstance(obj, unicode) assert isinstance(obj, xunicode)
_ = obj.__getnewargs__()[0] # (`unicode-data`,) _ = obj.__getnewargs__()[0] # (`unicode-data`,)
assert type(_) is unicode assert type(_) is xunicode
return _ return _
""" """
cdef Py_UNICODE* u = PyUnicode_AsUnicode(obj) cdef Py_UNICODE* u = PyUnicode_AsUnicode(obj)
...@@ -1590,7 +1598,7 @@ cdef _bprintf(const uint8_t[::1] fmt, xarg): # -> pybstr ...@@ -1590,7 +1598,7 @@ cdef _bprintf(const uint8_t[::1] fmt, xarg): # -> pybstr
#print('--> __mod__ ', repr(fmt1), ' % ', repr(arg)) #print('--> __mod__ ', repr(fmt1), ' % ', repr(arg))
try: try:
s = bytes.__mod__(fmt1, arg) s = xbytes.__mod__(fmt1, arg)
except ValueError as e: except ValueError as e:
# adjust position in '... at index <idx>' from fmt1 to fmt # adjust position in '... at index <idx>' from fmt1 to fmt
if len(e.args) == 1: if len(e.args) == 1:
...@@ -1701,7 +1709,8 @@ cdef object _buffer_py2(object obj): ...@@ -1701,7 +1709,8 @@ cdef object _buffer_py2(object obj):
# #
# buf must expose buffer interface. # buf must expose buffer interface.
# encoding/errors can be None meaning to use default utf-8/strict. # encoding/errors can be None meaning to use default utf-8/strict.
cdef unicode _buffer_decode(buf, encoding, errors): #cdef xunicode _buffer_decode(buf, encoding, errors):
cdef _buffer_decode(buf, encoding, errors):
if encoding is None: encoding = 'utf-8' # NOTE always UTF-8, not sys.getdefaultencoding if encoding is None: encoding = 'utf-8' # NOTE always UTF-8, not sys.getdefaultencoding
if errors is None: errors = 'strict' if errors is None: errors = 'strict'
if _XPyObject_CheckOldBuffer(buf): if _XPyObject_CheckOldBuffer(buf):
...@@ -1845,9 +1854,9 @@ def _utf8_decode_surrogateescape(const uint8_t[::1] s): # -> unicode ...@@ -1845,9 +1854,9 @@ def _utf8_decode_surrogateescape(const uint8_t[::1] s): # -> unicode
# _utf8_encode_surrogateescape mimics s.encode('utf-8', 'surrogateescape') from py3. # _utf8_encode_surrogateescape mimics s.encode('utf-8', 'surrogateescape') from py3.
def _utf8_encode_surrogateescape(s): # -> bytes def _utf8_encode_surrogateescape(s): # -> bytes
assert isinstance(s, unicode) assert isinstance(s, xunicode)
if PY_MAJOR_VERSION >= 3: if PY_MAJOR_VERSION >= 3:
return unicode.encode(s, 'UTF-8', 'surrogateescape') return xunicode.encode(s, 'UTF-8', 'surrogateescape')
# py2 does not have surrogateescape error handler, and even if we # py2 does not have surrogateescape error handler, and even if we
# provide one, builtin unicode.encode() does not treat # provide one, builtin unicode.encode() does not treat
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment