Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
nexedi
cython
Commits
2a9d8d45
Commit
2a9d8d45
authored
Jan 06, 2013
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
implement \N{...} Unicode escapes for literals
parent
159a3b9a
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
33 additions
and
15 deletions
+33
-15
CHANGES.rst
CHANGES.rst
+2
-0
Cython/Compiler/Lexicon.py
Cython/Compiler/Lexicon.py
+1
-0
Cython/Compiler/Parsing.py
Cython/Compiler/Parsing.py
+16
-11
tests/run/strliterals.pyx
tests/run/strliterals.pyx
+8
-4
tests/run/unicodeliterals.pyx
tests/run/unicodeliterals.pyx
+3
-0
tests/run/unicodeliteralslatin1.pyx
tests/run/unicodeliteralslatin1.pyx
+3
-0
No files found.
CHANGES.rst
View file @
2a9d8d45
...
...
@@ -8,6 +8,8 @@ Cython Changelog
Features added
--------------
* Named Unicode escapes ("\N{...}") are supported.
* Python functions/classes provide the special attribute "__qualname__"
as defined by PEP 3155.
...
...
Cython/Compiler/Lexicon.py
View file @
2a9d8d45
...
...
@@ -66,6 +66,7 @@ def make_lexicon():
two_hex
=
hexdigit
+
hexdigit
four_hex
=
two_hex
+
two_hex
escapeseq
=
Str
(
"
\
\
"
)
+
(
two_oct
|
three_oct
|
Str
(
'N{'
)
+
Rep
(
AnyBut
(
'}'
))
+
Str
(
'}'
)
|
Str
(
'u'
)
+
four_hex
|
Str
(
'x'
)
+
two_hex
|
Str
(
'U'
)
+
four_hex
+
four_hex
|
AnyChar
)
...
...
Cython/Compiler/Parsing.py
View file @
2a9d8d45
...
...
@@ -8,6 +8,7 @@ import cython
cython
.
declare
(
Nodes
=
object
,
ExprNodes
=
object
,
EncodedString
=
object
)
import
re
import
unicodedata
from
Cython.Compiler.Scanning
import
PyrexScanner
,
FileSourceDescriptor
import
Nodes
...
...
@@ -803,23 +804,27 @@ def p_string_literal(s, kind_override=None):
StringEncoding
.
char_from_escape_sequence
(
systr
))
elif
c
==
u'
\
n
'
:
pass
elif
c
==
u'x'
:
elif
c
==
u'x'
:
# \xXX
if
len
(
systr
)
==
4
:
chars
.
append_charval
(
int
(
systr
[
2
:],
16
)
)
else
:
s
.
error
(
"Invalid hex escape '%s'"
%
systr
)
elif
c
in
u'Uu'
:
if
kind
in
(
'u'
,
''
):
if
len
(
systr
)
in
(
6
,
10
):
chrval
=
int
(
systr
[
2
:],
16
)
if
chrval
>
1114111
:
# sys.maxunicode:
s
.
error
(
"Invalid unicode escape '%s'"
%
systr
)
else
:
elif
c
in
u'NUu'
and
kind
in
(
'u'
,
''
):
# \uxxxx, \Uxxxxxxxx, \N{...}
chrval
=
-
1
if
c
==
u'N'
:
try
:
chrval
=
ord
(
unicodedata
.
lookup
(
systr
[
3
:
-
1
]))
except
KeyError
:
s
.
error
(
"Unknown Unicode character name %r"
%
systr
[
3
:
-
1
])
elif
len
(
systr
)
in
(
6
,
10
):
chrval
=
int
(
systr
[
2
:],
16
)
if
chrval
>
1114111
:
# sys.maxunicode:
s
.
error
(
"Invalid unicode escape '%s'"
%
systr
)
chrval
=
-
1
else
:
# unicode escapes in byte strings are not unescaped
chrval
=
None
chars
.
append_uescape
(
chrval
,
systr
)
s
.
error
(
"Invalid unicode escape '%s'"
%
systr
)
if
chrval
>=
0
:
chars
.
append_uescape
(
chrval
,
systr
)
else
:
chars
.
append
(
u'
\
\
'
+
systr
[
1
:])
if
is_python3_source
and
not
has_non_ASCII_literal_characters
\
...
...
tests/run/strliterals.pyx
View file @
2a9d8d45
...
...
@@ -132,9 +132,9 @@ __doc__ = ur"""
>>> len(bytes_uescape)
28
>>> (sys.version_info[0] >= 3 and sys.maxunicode == 1114111 and len(str_uescape) ==
3
or
... sys.version_info[0] >= 3 and sys.maxunicode == 65535 and len(str_uescape) ==
4
or
... sys.version_info[0] < 3 and len(str_uescape) ==
17
or
>>> (sys.version_info[0] >= 3 and sys.maxunicode == 1114111 and len(str_uescape) ==
4
or
... sys.version_info[0] >= 3 and sys.maxunicode == 65535 and len(str_uescape) ==
5
or
... sys.version_info[0] < 3 and len(str_uescape) ==
28
or
... len(str_uescape))
True
>>> (sys.version_info[0] >= 3 and str_uescape[0] == 'c' or
...
...
@@ -143,6 +143,10 @@ __doc__ = ur"""
True
>>> print(str_uescape[-1])
B
>>> (sys.version_info[0] >= 3 and ord(str_uescape[-2]) == 0x2603 or
... sys.version_info[0] < 3 and str_uescape[-12:-1] == b'\\N{SNOWMAN}' or
... sys.version_info[0] >= 3 and ord(str_uescape[-2]) or str_uescape[-12:-1])
True
>>> newlines == "Aaa\n"
True
...
...
@@ -185,7 +189,7 @@ bresc = br'\12\'\"\\'
uresc
=
ur'\12\'\"\\'
bytes_uescape
=
b'
\
u1234
\
U12345678
\
u
\
u1
\
u
12
\
uX
'
str_uescape = '
\
u0063
\
U00012345
\
x42
'
str_uescape = '
\
u0063
\
U00012345
\
N
{
SNOWMAN
}
\
x42
'
newlines = "Aaa
\
n
"
...
...
tests/run/unicodeliterals.pyx
View file @
2a9d8d45
...
...
@@ -75,6 +75,8 @@ __doc__ = br"""
True
>>> h == u'
\
\
ud800' # unescaped by Python (required by doctest)
True
>>> k == u'
\
\
N{SNOWMAN}' == u'
\
\
u2603'
True
>>> add == u'Søk ik' + u'üÖä' + 'abc'
True
>>> null == u'
\
\
x00' # unescaped by Python (required by doctest)
...
...
@@ -107,6 +109,7 @@ e = u'\x03\x67\xf8\uf8d2Søk ik'
f
=
u'
\
xf8
'
g
=
u'
\
udc00
'
# lone trail surrogate
h
=
u'
\
ud800
'
# lone lead surrogate
k
=
u'
\
N{SNOWMAN}
'
add
=
u'Søk ik'
+
u'üÖä'
+
u'abc'
null
=
u'
\
x00
'
...
...
tests/run/unicodeliteralslatin1.pyx
View file @
2a9d8d45
...
...
@@ -55,6 +55,8 @@ __doc__ = br"""
True
>>> f == u'
\
\
xf8' # unescaped by Python
True
>>> k == u'' == u'
\
\
N{LATIN SMALL LETTER A WITH DIAERESIS}'
True
>>> add == u'Sk ik' + u'' + 'abc'
True
>>> null == u'
\
\
x00' # unescaped by Python (required by doctest)
...
...
@@ -75,6 +77,7 @@ c = u'S
d
=
u''
e
=
u'
\
x03
\
x67
\
xf8
\
uf8d2
Sk ik'
f
=
u'
\
xf8
'
k
=
u'
\
N{LATIN SMALL LETTER A WITH DIAERESIS}
'
add
=
u'Sk ik'
+
u''
+
u'abc'
null
=
u'
\
x00
'
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment