Commit ed6b7895 authored by Kirill Smelkov's avatar Kirill Smelkov

strconv += unquote(), unquote_next()

This are functions to decode quotation that was produced by
strconv.quote().
parent f09701b0
......@@ -135,7 +135,7 @@ without escaping printable UTF-8 characters::
and also any other type that can be converted to `str`.
Package `golang.strconv` provides direct access to conversion routines, for
example `strconv.quote`.
example `strconv.quote` and `strconv.unquote`.
Benchmarking and testing
......
......@@ -102,6 +102,93 @@ def _quote(s):
return b'"' + b''.join(outv) + b'"'
# unquote decodes unicode|byte string that was produced by quote.
#
# ValueError is raised if there are quoting syntax errors.
def unquote(s):
us, tail = unquote_next(s)
if len(tail) != 0:
raise ValueError('non-empty tail after closing "')
return us
# unquote_next decodes next unicode|byte string that was produced by quote.
#
# it returns -> (unquoted(s), tail-after-")
#
# ValueError is raised if there are quoting syntax errors.
def unquote_next(s):
s, wasunicode = _bstr(s)
us, tail = _unquote_next(s)
if wasunicode:
us = us.decode('UTF-8')
tail = tail.decode('UTF-8')
return us, tail
def _unquote_next(s):
assert isinstance(s, bytes)
if len(s) == 0 or s[0:0+1] != b'"':
raise ValueError('no starting "')
outv = []
emit= outv.append
s = s[1:]
while 1:
r, width = _utf8_decode_rune(s)
if width == 0:
raise ValueError('no closing "')
if r == u'"':
s = s[1:]
break
# regular UTF-8 character
if r != u'\\':
emit(s[:width])
s = s[width:]
continue
if len(s) < 2:
raise ValueError('unexpected EOL after \\')
c = s[1:1+1]
# \<c> -> <c> ; c = \ "
if c in b'\\"':
emit(c)
s = s[2:]
continue
if c == b't':
emit(b'\t')
s = s[2:]
continue
if c == b'n':
emit(b'\n')
s = s[2:]
continue
if c == b'r':
emit(b'\r')
s = s[2:]
continue
if c == b'x': # hex XXX also handle octals?
if len(s) < 2+2:
raise ValueError('unexpected EOL after \\x')
b = codecs.decode(s[2:2+2], 'hex')
emit(b)
s = s[2+2:]
continue
raise ValueError('invalid escape \\%s' % chr(ord(c[0:0+1])))
return b''.join(outv), s
_printable_cat0 = frozenset(['L', 'N', 'P', 'S']) # letters, numbers, punctuation, symbols
_rune_error = u'\uFFFD' # unicode replacement character
......
......@@ -18,11 +18,12 @@
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
from golang.strconv import quote
from golang.strconv import quote, unquote, unquote_next
from golang.gcompat import qq
from six import int2byte as bchr, PY3
from six.moves import range as xrange
from pytest import raises
def byterange(start, stop):
b = b""
......@@ -62,10 +63,16 @@ def test_quote():
for tin, tquoted in testv:
# quote(in) == quoted
# in = unquote(quoted)
q = b'"' if isinstance(tquoted, bytes) else '"'
tail = b'123' if isinstance(tquoted, bytes) else '123'
tquoted = q + tquoted + q # add lead/trail "
assert quote(tin) == tquoted
assert unquote(tquoted) == tin
assert unquote_next(tquoted) == (tin, type(tin)())
assert unquote_next(tquoted + tail) == (tin, tail)
raises(ValueError, 'unquote(tquoted + tail)')
# qq always gives str
assert qq(tin) == asstr(tquoted)
......@@ -78,12 +85,35 @@ def test_quote():
# some inputs are not valid UTF-8
continue
tquoted = tquoted.decode('utf-8')
tail = tail.decode('utf-8')
else:
# tin was unicode
tin = tin.encode('utf-8')
tquoted = tquoted.encode('utf-8')
tail = tail.encode('utf-8')
assert quote(tin) == tquoted
assert unquote(tquoted) == tin
assert unquote_next(tquoted) == (tin, type(tin)())
assert unquote_next(tquoted + tail) == (tin, tail)
raises(ValueError, 'unquote(tquoted + tail)')
# qq always gives str
assert qq(tin) == asstr(tquoted)
def test_unquote_bad():
testv = (
# in error
('x"zzz"', 'no starting "'),
('"zzz', 'no closing "'),
('"\\', 'unexpected EOL after \\'),
('"\\x', 'unexpected EOL after \\x'),
('"\\x0', 'unexpected EOL after \\x'),
('"\\z"', 'invalid escape \\z'),
)
for tin, err in testv:
with raises(ValueError) as exc:
unquote(tin)
assert exc.value.args == (err,)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment