Commit a391c3a3 authored by Stefan Behnel's avatar Stefan Behnel

preprocess byte string literal escaping instead of doing repeated replacements at runtime

parent f989876b
...@@ -165,6 +165,9 @@ char_from_escape_sequence = { ...@@ -165,6 +165,9 @@ char_from_escape_sequence = {
r'\v' : u'\v', r'\v' : u'\v',
}.get }.get
_c_special = ('\\', '??', '"') + tuple(map(chr, range(32)))
def _to_escape_sequence(s): def _to_escape_sequence(s):
if s in '\n\r\t': if s in '\n\r\t':
return repr(s)[1:-1] return repr(s)[1:-1]
...@@ -176,19 +179,23 @@ def _to_escape_sequence(s): ...@@ -176,19 +179,23 @@ def _to_escape_sequence(s):
# within a character sequence, oct passes much better than hex # within a character sequence, oct passes much better than hex
return ''.join(['\\%03o' % ord(c) for c in s]) return ''.join(['\\%03o' % ord(c) for c in s])
_c_special = ('\\', '??', '"') + tuple(map(chr, range(32)))
_c_special_replacements = [(orig.encode('ASCII'),
_to_escape_sequence(orig).encode('ASCII'))
for orig in _c_special ]
def _build_specials_test(): def _build_specials_replacer():
subexps = [] subexps = []
replacements = {}
for special in _c_special: for special in _c_special:
regexp = ''.join(['[%s]' % c.replace('\\', '\\\\') for c in special]) regexp = ''.join(['[%s]' % c.replace('\\', '\\\\') for c in special])
subexps.append(regexp) subexps.append(regexp)
return re.compile('|'.join(subexps).encode('ASCII')).search replacements[special.encode('ASCII')] = _to_escape_sequence(special).encode('ASCII')
sub = re.compile(('(%s)' % '|'.join(subexps)).encode('ASCII')).sub
def replace_specials(m):
return replacements[m.group(1)]
def replace(s):
return sub(replace_specials, s)
return replace
_replace_specials = _build_specials_replacer()
_has_specials = _build_specials_test()
def escape_char(c): def escape_char(c):
if IS_PYTHON3: if IS_PYTHON3:
...@@ -210,10 +217,7 @@ def escape_byte_string(s): ...@@ -210,10 +217,7 @@ def escape_byte_string(s):
encoded as ISO-8859-1, will result in the correct byte sequence encoded as ISO-8859-1, will result in the correct byte sequence
being written. being written.
""" """
if _has_specials(s): s = _replace_specials(s)
for special, replacement in _c_special_replacements:
if special in s:
s = s.replace(special, replacement)
try: try:
return s.decode("ASCII") # trial decoding: plain ASCII => done return s.decode("ASCII") # trial decoding: plain ASCII => done
except UnicodeDecodeError: except UnicodeDecodeError:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment