Commit 0985c583 authored by Kirill Smelkov's avatar Kirill Smelkov

golang_str: bstr/ustr .format() support

Similarly to %-formatting, let's add support for .format(). This is
easier to do because we can leverage string.Formatting and hook into the
process by proper subclassing. We do not need to implement parsing and
need to only customize handling of 's' and 'r' specifiers.

For testing we mostly reuse existing tests for %-formatting by amending
them a bit to exercise both %-formatting and format-formatting at the
same time: by converting %-format specification into corresponding
{}-format specification and verifying formatting result for that to be
as expected.

Some explicit tests for {}-style .format() are also added.
parent 390fd810
...@@ -56,6 +56,7 @@ cdef extern from "Python.h": ...@@ -56,6 +56,7 @@ cdef extern from "Python.h":
from libc.stdint cimport uint8_t from libc.stdint cimport uint8_t
pystrconv = None # = golang.strconv imported at runtime (see __init__.py) pystrconv = None # = golang.strconv imported at runtime (see __init__.py)
import string as pystring
import types as pytypes import types as pytypes
import functools as pyfunctools import functools as pyfunctools
import re as pyre import re as pyre
...@@ -391,6 +392,20 @@ class pybstr(bytes): ...@@ -391,6 +392,20 @@ class pybstr(bytes):
a = _pybu_rcoerce(a) a = _pybu_rcoerce(a)
return a.__mod__(b) return a.__mod__(b)
# format
def format(self, *args, **kwargs): return pyb(pyu(self).format(*args, **kwargs))
def format_map(self, mapping): return pyb(pyu(self).format_map(mapping))
def __format__(self, format_spec):
# NOTE don't convert to b due to "TypeError: __format__ must return a str, not pybstr"
# we are ok to return ustr even for format(bstr, ...) because in
# practice format builtin is never used and it is only s.format()
# that is used in programs. This way __format__ will be invoked
# only internally.
#
# NOTE we are ok to use ustr.__format__ because the only format code
# supported by bstr/ustr/unicode __format__ is 's', not e.g. 'r'.
return pyu(self).__format__(format_spec)
# all other string methods # all other string methods
...@@ -646,6 +661,17 @@ class pyustr(unicode): ...@@ -646,6 +661,17 @@ class pyustr(unicode):
a = _pybu_rcoerce(a) a = _pybu_rcoerce(a)
return a.__mod__(b) return a.__mod__(b)
# format
def format(self, *args, **kwargs):
return pyu(_bvformat(self, args, kwargs))
def format_map(self, mapping):
return pyu(_bvformat(self, (), mapping))
def __format__(self, format_spec):
# NOTE not e.g. `_bvformat(_pyu_coerce(format_spec), (self,))` because
# the only format code that string.__format__ should support is
# 's', not e.g. 'r'.
return pyu(unicode.__format__(self, format_spec))
# all other string methods # all other string methods
...@@ -1290,7 +1316,7 @@ cdef class _UnboundMethod(object): # they removed unbound methods on py3 ...@@ -1290,7 +1316,7 @@ cdef class _UnboundMethod(object): # they removed unbound methods on py3
# '%r' % [b'\xce\xb2'] -> "[b'β']" # '%r' % [b'\xce\xb2'] -> "[b'β']"
# #
# #
# For "2" we implement %-format parsing ourselves. test_strings_mod # For "2" we implement %-format parsing ourselves. test_strings_mod_and_format
# has good coverage for this phase to make sure we get it right and behaving # has good coverage for this phase to make sure we get it right and behaving
# exactly the same way as standard Python does. # exactly the same way as standard Python does.
# #
...@@ -1533,6 +1559,76 @@ cdef _bprintf(const uint8_t[::1] fmt, xarg): # -> pybstr ...@@ -1533,6 +1559,76 @@ cdef _bprintf(const uint8_t[::1] fmt, xarg): # -> pybstr
return pybstr(out) return pybstr(out)
# ---- .format formatting ----
# Handling .format is easier and similar to %-Formatting: we detect fields to
# format as strings via using custom string.Formatter (see _BFormatter), and
# further treat objects to stringify similarly to how %-formatting does for %s and %r.
#
# We do not need to implement format parsing ourselves, because
# string.Formatter provides it.
# _bvformat implements .format for pybstr/pyustr.
cdef _bvformat(fmt, args, kw):
return _BFormatter().vformat(fmt, args, kw)
class _BFormatter(pystring.Formatter):
def format_field(self, v, fmtspec):
#print('format_field', repr(v), repr(fmtspec))
# {} on bytes/bytearray -> treat it as bytestring
if type(v) in (bytes, bytearray):
v = pyb(v)
#print(' ~ ', repr(v))
# if the object contains bytes inside, e.g. as in [b'β'] - treat those
# internal bytes also as bytestrings
_bstringify_enter()
try:
#return super(_BFormatter, self).format_field(v, fmtspec)
x = super(_BFormatter, self).format_field(v, fmtspec)
finally:
_bstringify_leave()
#print(' ->', repr(x))
if PY_MAJOR_VERSION < 3: # py2 Formatter._vformat does does ''.join(result)
x = pyu(x) # -> we want everything in result to be unicode to avoid
# UnicodeDecodeError
return x
def convert_field(self, v, conv):
#print('convert_field', repr(v), repr(conv))
if conv == 's':
# string.Formatter does str(v) for 's'. we don't want that:
# py3: stringify, and especially treat bytes as bytestring
# py2: stringify, avoiding e.g. UnicodeEncodeError for str(unicode)
x = pyb(_bstringify(v))
elif conv == 'r':
# for bytes {!r} produces ASCII-only, but we want unicode-like !r for e.g. b'β'
# -> handle it ourselves
x = pyb(_bstringify_repr(v))
else:
x = super(_BFormatter, self).convert_field(v, conv)
#print(' ->', repr(x))
return x
# on py2 string.Formatter does not handle field autonumbering
# -> do it ourselves
if PY_MAJOR_VERSION < 3:
_autoidx = 0
_had_digit = False
def get_field(self, field_name, args, kwargs):
if field_name == '':
if self._had_digit:
raise ValueError("mixing explicit and auto numbered fields is forbidden")
field_name = str(self._autoidx)
self._autoidx += 1
elif field_name.isdigit():
self._had_digit = True
if self._autoidx != 0:
raise ValueError("mixing explicit and auto numbered fields is forbidden")
return super(_BFormatter, self).get_field(field_name, args, kwargs)
# ---- misc ---- # ---- misc ----
# _strhas returns whether unicode string type has specified method. # _strhas returns whether unicode string type has specified method.
......
...@@ -31,7 +31,7 @@ import sys ...@@ -31,7 +31,7 @@ import sys
import six import six
from six import text_type as unicode, unichr from six import text_type as unicode, unichr
from six.moves import range as xrange from six.moves import range as xrange
import pickle, copy, types import re, pickle, copy, types
import array, collections import array, collections
...@@ -880,8 +880,8 @@ def test_strings_ops2_eq_any(tx): ...@@ -880,8 +880,8 @@ def test_strings_ops2_eq_any(tx):
_(l) _(l)
# verify logic in `bstr % ...` . # verify logic in `bstr % ...` and `bstr.format(...)` .
def test_strings_mod(): def test_strings_mod_and_format():
# verify_fmt_all_types verifies f(fmt, args) for all combinations of # verify_fmt_all_types verifies f(fmt, args) for all combinations of
# #
# · fmt being unicode, bstr, ustr # · fmt being unicode, bstr, ustr
...@@ -1049,13 +1049,56 @@ def test_strings_mod(): ...@@ -1049,13 +1049,56 @@ def test_strings_mod():
_('*str %(x)%', {'x':1}, ValueError("unsupported format character '%' (0x25) at index 9")) _('*str %(x)%', {'x':1}, ValueError("unsupported format character '%' (0x25) at index 9"))
# parse checking complete. now verify actual %-formatting # parse checking complete. now verify actual %- and format- formatting
# fmt_percent_to_bracket converts %-style format to .format-style format string.
def fmt_percent_to_bracket(fmt):
# replace %<x> with corresponding {} style
# be dumb and explicit in replacement to make sure there is no chance
# we get this logic wrong
def _(m):
r = {
'%s': '{!s}',
'%r': '{!r}',
'%(x)s': '{x!s}',
'%(y)s': '{y!s}',
'%(z)s': '{z!s}',
}
return r[m.group()]
fmt_ = re.sub('%[^ ]*[a-z]', _, fmt)
assert '%' not in fmt_
return fmt_
# xformat calls fmt.format with *args or **args appropriately.
def xformat(fmt, args):
if isinstance(args, (dict, six.moves.UserDict)):
a = fmt.format(**args)
if not (six.PY2 and type(fmt) is unicode):
b = fmt.format_map(args) # py2: no unicode.format_map()
assert a == b
return a
elif isinstance(args, tuple):
return fmt.format(*args)
else:
return fmt.format(args) # it was e.g. `'%s' % 123`
# _ verifies `fmt % args` # _ verifies `fmt % args` and `fmt'.format(args)`
# if fmt has no '%' only .format(args) is verified. # if fmt has no '%' only .format(args) is verified.
def _(fmt, args, *okv): def _(fmt, args, *okv):
if '%' in fmt:
verify_fmt_all_types(lambda fmt, args: fmt % args, verify_fmt_all_types(lambda fmt, args: fmt % args,
fmt, args, *okv) fmt, args, *okv)
# compute fmt' for .format verification
fmt_ = fmt_percent_to_bracket(fmt)
# and assert that .format result is the same as for %
# compare to b() formatting because else on py2 we hit unicode % issues
# we, anyway, just verified b() % above.
if len(okv) == 0:
okv = [b(fmt) % args]
else:
fmt_ = fmt
verify_fmt_all_types(xformat, fmt_, args, *okv)
_("*str a %s z", 123) # NOTE *str to force str -> bstr/ustr even for ASCII string _("*str a %s z", 123) # NOTE *str to force str -> bstr/ustr even for ASCII string
_("*str a %s z", '*str \'"\x7f') _("*str a %s z", '*str \'"\x7f')
...@@ -1229,6 +1272,17 @@ def test_strings_mod(): ...@@ -1229,6 +1272,17 @@ def test_strings_mod():
assert br == ok assert br == ok
assert ur == ok assert ur == ok
# verify b().format(args) and u().format(args)
fmt_ = fmt_percent_to_bracket(fmt)
bfmt_ = b(fmt_)
ufmt_ = u(fmt_)
br_ = xformat(bfmt_, args) #;print(repr(bfmt), " .format ", repr(args), " -> ", repr(br))
ur_ = xformat(ufmt_, args) #;print(repr(ufmt), " .format ", repr(args), " -> ", repr(ur))
assert type(br_) is bstr
assert type(ur_) is ustr
assert br_ == ok
assert ur_ == ok
M("α %s π", U ( u'май') , "α май π") M("α %s π", U ( u'май') , "α май π")
M("α %s π", (U ( u'май'),) , "α май π") M("α %s π", (U ( u'май'),) , "α май π")
M("α %s π", [U ( u'май')] , "α ['май'] π") M("α %s π", [U ( u'май')] , "α ['май'] π")
...@@ -1305,6 +1359,68 @@ def test_strings_mod(): ...@@ -1305,6 +1359,68 @@ def test_strings_mod():
M("α %r", [b('β')] , "α [b('β')]") M("α %r", [b('β')] , "α [b('β')]")
M("α %r", [u('β')] , "α [u('β')]") M("α %r", [u('β')] , "α [u('β')]")
# some explicit verifications for .format()
_("*str hello {}", ("world",))
_("*str hello {}", (["world"],))
_("*str hello {}", ("мир",))
_("*str hello {}", (["мир"],) , "*str hello ['мир']")
_("привет {}", ("мир",))
_("привет {}", (["мир"],) , "привет ['мир']")
_("привет {0}, {1}", ("Петя", "Вася"))
_("привет {name}", {'name': "Ваня"})
_("привет {name}", {"name": "Тигра"} , "привет Тигра")
_("привет {name!s}", {"name": "Винни"} , "привет Винни")
_("привет {name:>10}", {"name": "Пух"} , "привет Пух")
_("привет {!s}", ("мир",))
_("привет {!s}", (["мир"],) , "привет ['мир']")
_("привет {:>10}", ("мир",))
_("привет {:>{}} {}", ("мир", 10, "α"))
_("привет {:02x}", (23,))
# verify __format__ + format() builtin
def test_strings__format__():
assert "привет {}".format("мир") == "привет мир"
assert "привет {}".format(b("мир")) == "привет мир"
assert "привет {}".format(u("мир")) == "привет мир"
assert format(u"мир") == u"мир"
assert format(u"мир", "") == u"мир"
assert format(u"мир", "s") == u"мир"
assert format(u"мир", ">5") == u" мир"
fb = format(b("мир"))
fb_ = format(b("мир"), "")
fbs = format(b("мир"), "s")
fb5 = format(b("мир"), ">5")
assert type(fb) is ustr # NOTE ustr, not bstr due to b.__format__ returning u
assert type(fb_) is ustr
assert type(fbs) is ustr
assert type(fb5) is ustr
assert fb == "мир"
assert fb_ == "мир"
assert fbs == "мир"
assert fb5 == " мир"
fu = format(u("мир"))
fu_ = format(u("мир"), "")
fus = format(u("мир"), "s")
fu5 = format(u("мир"), ">5")
assert type(fu) is ustr
assert type(fu_) is ustr
assert type(fus) is ustr
assert type(fu5) is ustr
assert fu == "мир"
assert fu_ == "мир"
assert fus == "мир"
assert fu5 == " мир"
# string.__format__ accepts only '' and 's' format codes
for fmt_spec in "abcdefghijklmnopqrstuvwxyz":
if fmt_spec == 's':
continue
with raises(ValueError): format( u"мир", fmt_spec)
with raises(ValueError): format(b("мир"), fmt_spec)
with raises(ValueError): format(u("мир"), fmt_spec)
# verify print for bstr/ustr. # verify print for bstr/ustr.
def test_strings_print(): def test_strings_print():
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment