Commit 71a59940 authored by Stefan Behnel's avatar Stefan Behnel

support for long unicode escapes ('\U...')

fixed unicode escape handling in byte strings
unescape \xXY in string literals as C allows it to conflict with trailing hex numbers - output string escaping will do the right thing
parent fcdc2a72
......@@ -64,7 +64,8 @@ def make_lexicon():
two_hex = hexdigit + hexdigit
four_hex = two_hex + two_hex
escapeseq = Str("\\") + (two_oct | three_oct | two_hex |
Str('u') + four_hex | Str('x') + two_hex | AnyChar)
Str('u') + four_hex | Str('x') + two_hex |
Str('U') + four_hex + four_hex | AnyChar)
deco = Str("@")
bra = Any("([{")
......
......@@ -2,7 +2,9 @@
# Pyrex Parser
#
import os, re
import os
import re
import sys
from types import ListType, TupleType
from Scanning import PyrexScanner, FileSourceDescriptor
import Nodes
......@@ -604,18 +606,17 @@ def p_string_literal(s):
chars.append(systr)
elif c == '\n':
pass
elif c in 'ux':
if kind == 'u':
try:
chars.append(
systr.encode("ASCII").decode('unicode_escape'))
except UnicodeDecodeError:
elif c in 'Uux':
if kind == 'u' or c == 'x':
chrval = int(systr[2:], 16)
if chrval > sys.maxunicode:
s.error("Invalid unicode escape '%s'" % systr,
pos = pos)
elif c == 'x':
chars.append('\\x0' + systr[2:])
strval = unichr(chrval)
else:
chars.append(systr)
# unicode escapes in plain byte strings are not unescaped
strval = systr
chars.append(strval.replace('\\', '\\\\'))
else:
chars.append(r'\\' + systr[1:])
elif sy == 'NEWLINE':
......
__doc__ = u"""
>>> py_strings = [
... '\\x1234',
... '\\x0A12\\x0C34',
... '\\x0A57',
... 'abc\\x12def',
... u'\\u1234',
... u'\\U00041234',
... b'\\u1234',
... b'\\U00041234',
... ]
>>> for i, (py_string, c_string) in enumerate(zip(py_strings, c_strings)):
... assert py_string == c_string, "%d: %r != %r" % (i, py_string, c_string)
"""
import sys
if sys.version_info[0] < 3:
__doc__ = __doc__.replace(" b'", " '")
else:
__doc__ = __doc__.replace(" u'", " '")
c_strings = [
'\x1234',
'\x0A12\x0C34',
'\x0A57',
'abc\x12def',
u'\u1234',
u'\U00041234',
b'\u1234',
b'\U00041234',
]
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment