Commit edc7aaab authored by Kirill Smelkov's avatar Kirill Smelkov

golang: Teach qq to be usable with both bytes and str format whatever type qq argument is

qq is used to quote strings or byte-strings. The following example
illustrates the problem we are currently hitting in zodbtools with
Python3:

    >>> "hello %s" % qq("мир")
    'hello "мир"'

    >>> b"hello %s" % qq("мир")
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    TypeError: %b requires a bytes-like object, or an object that implements __bytes__, not 'str'

    >>> "hello %s" % qq(b("мир"))
    'hello "мир"'

    >>> b"hello %s" % qq(b("мир"))
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    TypeError: %b requires a bytes-like object, or an object that implements __bytes__, not 'str'

i.e. one way or another if type of format string and what qq returns do not
match it creates a TypeError.

We want qq(obj) to be useable with both string and bytestring format.

For that let's teach qq to return special str- and bytes- derived types that
know how to automatically convert to str->bytes and bytes->str via b/u
correspondingly. This way formatting works whatever types combination it was
for format and for qq, and the whole result has the same type as format.

For now we teach only qq to use new types and don't generally expose
_str and _unicode to be returned by b and u yet. However we might do so
in the future after incrementally gaining a bit more experience.

/proposed-for-review-on: !1
parent 85a1765d
......@@ -828,15 +828,56 @@ def pyqq(obj):
qobj = pystrconv.quote(obj)
# `printf('%s', qq(obj))` should work. For this make sure qobj is always
# of str type (unicode on py3, bytes on py2).
# a-la str type (unicode on py3, bytes on py2), that can be transparently
# converted to unicode or bytes as needed.
if PY_MAJOR_VERSION >= 3:
qobj = pyu(qobj)
qobj = _pyunicode(pyu(qobj))
else:
qobj = pyb(qobj)
qobj = _pystr(pyb(qobj))
return qobj
# XXX cannot `cdef class`: github.com/cython/cython/issues/711
class _pystr(bytes):
"""_str is like bytes but can be automatically converted to Python unicode
string via UTF-8 decoding.
The decoding never fails nor looses information - see u for details.
"""
# don't allow to set arbitrary attributes.
# won't be needed after switch to -> `cdef class`
__slots__ = ()
# __bytes__ - no need
def __unicode__(self): return pyu(self)
def __str__(self):
if PY_MAJOR_VERSION >= 3:
return pyu(self)
else:
return self
cdef class _pyunicode(unicode):
"""_unicode is like unicode(py2)|str(py3) but can be automatically converted
to bytes via UTF-8 encoding.
The encoding always succeeds - see b for details.
"""
def __bytes__(self): return pyb(self)
# __unicode__ - no need
def __str__(self):
if PY_MAJOR_VERSION >= 3:
return self
else:
return pyb(self)
# ---- error ----
from golang cimport errors
......
......@@ -1660,6 +1660,18 @@ def test_qq():
assert isinstance(qq(b('мир')), str) # qq(b) -> str (bytes·py2, unicode·py3)
assert isinstance(qq( u'мир'), str) # qq(u) -> str (bytes·py2, unicode·py3)
# however what qq returns can be mixed with both unicode and bytes
assert b'hello %s !' % qq(b('мир')) == b('hello "мир" !') # b % qq(b)
assert b'hello %s !' % qq(u('мир')) == b('hello "мир" !') # b % qq(u) -> b
assert u'hello %s !' % qq(u('мир')) == u('hello "мир" !') # u % qq(u)
assert u'hello %s !' % qq(b('мир')) == u'hello "мир" !' # u % qq(b) -> u
# custom attributes cannot be injected to what qq returns
x = qq('мир')
if not ('PyPy' in sys.version): # https://foss.heptapod.net/pypy/pypy/issues/2763
with raises(AttributeError):
x.hello = 1
# ---- misc ----
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment