Commit 39389bdc authored by Stefan Behnel's avatar Stefan Behnel

escape C digraphs, trigraphs and other special characters in strings

parent f40f8078
......@@ -99,8 +99,26 @@ class EncodedString(unicode):
# return unicode.__eq__(self, other) and \
# getattr(other, 'encoding', '') == self.encoding
def _to_oct_sequence(s):
return ''.join(['\\%03o' % ord(c) for c in s])
_c_special = ('\0', '??', '<:', ':>', '<%', '%>', '%:', '%:')
_c_special_replacements = zip(_c_special, map(_to_oct_sequence, _c_special))
def _build_special_test():
subexps = []
for special in _c_special + ('\n','\r','\t'):
regexp = ''.join(['[%s]' % c for c in special ])
subexps.append(regexp)
return re.compile('(' + '|'.join(subexps) + ')').search
_has_specials = _build_special_test()
def escape_byte_string(s):
s = s.replace('\0', r'\000').replace('\x0A', r'\012').replace('\x0C', r'\014')
if _has_specials(s):
s = s.replace('\n', r'\n').replace('\r', r'\r').replace('\t', r'\t')
for special, replacement in _c_special_replacements:
s = s.replace(special, replacement)
try:
s.decode("ASCII")
return s
......
......@@ -6,13 +6,23 @@ __doc__ = u"""
... b'\\x0A57',
... b'abc\\x12def',
... u'\\u1234',
... u'\\U00041234',
... u'\\U00001234',
... b'\\u1234',
... b'\\U00041234',
... b'\\U00001234',
... b'\\n\\r\\t',
... b':>',
... b'??>',
... b'\\0\\0\\0',
... ]
>>> for i, (py_string, c_string) in enumerate(zip(py_strings, c_strings)):
>>> for i, (py_string, (c_string, length)) in enumerate(zip(py_strings, c_strings)):
... assert py_string == c_string, "%d: %r != %r" % (i, py_string, c_string)
... assert len(py_string) == length, (
... "%d: wrong length of %r, got %d, expected %d" % (
... i, py_string, len(py_string), length))
... assert len(c_string) == length, (
... "%d: wrong length of %r, got %d, expected %d" % (
... i, c_string, len(c_string), length))
"""
......@@ -23,12 +33,16 @@ else:
__doc__ = __doc__.replace(u" u'", u" '")
c_strings = [
b'\x1234',
b'\x0A12\x0C34',
b'\x0A57',
b'abc\x12def',
u'\u1234',
u'\U00041234',
b'\u1234',
b'\U00041234',
(b'\x1234', 3),
(b'\x0A12\x0C34', 6),
(b'\x0A57', 3),
(b'abc\x12def', 7),
(u'\u1234', 1),
(u'\U00001234', 1),
(b'\u1234', 6),
(b'\U00001234', 10),
(b'\n\r\t', 3),
(b':>', 2),
(b'??>', 3),
(b'\0\0\0', 3),
]
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment