Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Boxiang Sun
cython
Commits
2a9d8d45
Commit
2a9d8d45
authored
Jan 06, 2013
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
implement \N{...} Unicode escapes for literals
parent
159a3b9a
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
33 additions
and
15 deletions
+33
-15
CHANGES.rst
CHANGES.rst
+2
-0
Cython/Compiler/Lexicon.py
Cython/Compiler/Lexicon.py
+1
-0
Cython/Compiler/Parsing.py
Cython/Compiler/Parsing.py
+16
-11
tests/run/strliterals.pyx
tests/run/strliterals.pyx
+8
-4
tests/run/unicodeliterals.pyx
tests/run/unicodeliterals.pyx
+3
-0
tests/run/unicodeliteralslatin1.pyx
tests/run/unicodeliteralslatin1.pyx
+3
-0
No files found.
CHANGES.rst
View file @
2a9d8d45
...
@@ -8,6 +8,8 @@ Cython Changelog
...
@@ -8,6 +8,8 @@ Cython Changelog
Features added
Features added
--------------
--------------
* Named Unicode escapes ("\N{...}") are supported.
* Python functions/classes provide the special attribute "__qualname__"
* Python functions/classes provide the special attribute "__qualname__"
as defined by PEP 3155.
as defined by PEP 3155.
...
...
Cython/Compiler/Lexicon.py
View file @
2a9d8d45
...
@@ -66,6 +66,7 @@ def make_lexicon():
...
@@ -66,6 +66,7 @@ def make_lexicon():
two_hex
=
hexdigit
+
hexdigit
two_hex
=
hexdigit
+
hexdigit
four_hex
=
two_hex
+
two_hex
four_hex
=
two_hex
+
two_hex
escapeseq
=
Str
(
"
\
\
"
)
+
(
two_oct
|
three_oct
|
escapeseq
=
Str
(
"
\
\
"
)
+
(
two_oct
|
three_oct
|
Str
(
'N{'
)
+
Rep
(
AnyBut
(
'}'
))
+
Str
(
'}'
)
|
Str
(
'u'
)
+
four_hex
|
Str
(
'x'
)
+
two_hex
|
Str
(
'u'
)
+
four_hex
|
Str
(
'x'
)
+
two_hex
|
Str
(
'U'
)
+
four_hex
+
four_hex
|
AnyChar
)
Str
(
'U'
)
+
four_hex
+
four_hex
|
AnyChar
)
...
...
Cython/Compiler/Parsing.py
View file @
2a9d8d45
...
@@ -8,6 +8,7 @@ import cython
...
@@ -8,6 +8,7 @@ import cython
cython
.
declare
(
Nodes
=
object
,
ExprNodes
=
object
,
EncodedString
=
object
)
cython
.
declare
(
Nodes
=
object
,
ExprNodes
=
object
,
EncodedString
=
object
)
import
re
import
re
import
unicodedata
from
Cython.Compiler.Scanning
import
PyrexScanner
,
FileSourceDescriptor
from
Cython.Compiler.Scanning
import
PyrexScanner
,
FileSourceDescriptor
import
Nodes
import
Nodes
...
@@ -803,23 +804,27 @@ def p_string_literal(s, kind_override=None):
...
@@ -803,23 +804,27 @@ def p_string_literal(s, kind_override=None):
StringEncoding
.
char_from_escape_sequence
(
systr
))
StringEncoding
.
char_from_escape_sequence
(
systr
))
elif
c
==
u'
\
n
'
:
elif
c
==
u'
\
n
'
:
pass
pass
elif
c
==
u'x'
:
elif
c
==
u'x'
:
# \xXX
if
len
(
systr
)
==
4
:
if
len
(
systr
)
==
4
:
chars
.
append_charval
(
int
(
systr
[
2
:],
16
)
)
chars
.
append_charval
(
int
(
systr
[
2
:],
16
)
)
else
:
else
:
s
.
error
(
"Invalid hex escape '%s'"
%
systr
)
s
.
error
(
"Invalid hex escape '%s'"
%
systr
)
elif
c
in
u'Uu'
:
elif
c
in
u'NUu'
and
kind
in
(
'u'
,
''
):
# \uxxxx, \Uxxxxxxxx, \N{...}
if
kind
in
(
'u'
,
''
):
chrval
=
-
1
if
len
(
systr
)
in
(
6
,
10
):
if
c
==
u'N'
:
chrval
=
int
(
systr
[
2
:],
16
)
try
:
if
chrval
>
1114111
:
# sys.maxunicode:
chrval
=
ord
(
unicodedata
.
lookup
(
systr
[
3
:
-
1
]))
s
.
error
(
"Invalid unicode escape '%s'"
%
systr
)
except
KeyError
:
else
:
s
.
error
(
"Unknown Unicode character name %r"
%
systr
[
3
:
-
1
])
elif
len
(
systr
)
in
(
6
,
10
):
chrval
=
int
(
systr
[
2
:],
16
)
if
chrval
>
1114111
:
# sys.maxunicode:
s
.
error
(
"Invalid unicode escape '%s'"
%
systr
)
s
.
error
(
"Invalid unicode escape '%s'"
%
systr
)
chrval
=
-
1
else
:
else
:
# unicode escapes in byte strings are not unescaped
s
.
error
(
"Invalid unicode escape '%s'"
%
systr
)
chrval
=
None
if
chrval
>=
0
:
chars
.
append_uescape
(
chrval
,
systr
)
chars
.
append_uescape
(
chrval
,
systr
)
else
:
else
:
chars
.
append
(
u'
\
\
'
+
systr
[
1
:])
chars
.
append
(
u'
\
\
'
+
systr
[
1
:])
if
is_python3_source
and
not
has_non_ASCII_literal_characters
\
if
is_python3_source
and
not
has_non_ASCII_literal_characters
\
...
...
tests/run/strliterals.pyx
View file @
2a9d8d45
...
@@ -132,9 +132,9 @@ __doc__ = ur"""
...
@@ -132,9 +132,9 @@ __doc__ = ur"""
>>> len(bytes_uescape)
>>> len(bytes_uescape)
28
28
>>> (sys.version_info[0] >= 3 and sys.maxunicode == 1114111 and len(str_uescape) ==
3
or
>>> (sys.version_info[0] >= 3 and sys.maxunicode == 1114111 and len(str_uescape) ==
4
or
... sys.version_info[0] >= 3 and sys.maxunicode == 65535 and len(str_uescape) ==
4
or
... sys.version_info[0] >= 3 and sys.maxunicode == 65535 and len(str_uescape) ==
5
or
... sys.version_info[0] < 3 and len(str_uescape) ==
17
or
... sys.version_info[0] < 3 and len(str_uescape) ==
28
or
... len(str_uescape))
... len(str_uescape))
True
True
>>> (sys.version_info[0] >= 3 and str_uescape[0] == 'c' or
>>> (sys.version_info[0] >= 3 and str_uescape[0] == 'c' or
...
@@ -143,6 +143,10 @@ __doc__ = ur"""
...
@@ -143,6 +143,10 @@ __doc__ = ur"""
True
True
>>> print(str_uescape[-1])
>>> print(str_uescape[-1])
B
B
>>> (sys.version_info[0] >= 3 and ord(str_uescape[-2]) == 0x2603 or
... sys.version_info[0] < 3 and str_uescape[-12:-1] == b'\\N{SNOWMAN}' or
... sys.version_info[0] >= 3 and ord(str_uescape[-2]) or str_uescape[-12:-1])
True
>>> newlines == "Aaa\n"
>>> newlines == "Aaa\n"
True
True
...
@@ -185,7 +189,7 @@ bresc = br'\12\'\"\\'
...
@@ -185,7 +189,7 @@ bresc = br'\12\'\"\\'
uresc
=
ur'\12\'\"\\'
uresc
=
ur'\12\'\"\\'
bytes_uescape
=
b'
\
u1234
\
U12345678
\
u
\
u1
\
u
12
\
uX
'
bytes_uescape
=
b'
\
u1234
\
U12345678
\
u
\
u1
\
u
12
\
uX
'
str_uescape = '
\
u0063
\
U00012345
\
x42
'
str_uescape = '
\
u0063
\
U00012345
\
N
{
SNOWMAN
}
\
x42
'
newlines = "Aaa
\
n
"
newlines = "Aaa
\
n
"
...
...
tests/run/unicodeliterals.pyx
View file @
2a9d8d45
...
@@ -75,6 +75,8 @@ __doc__ = br"""
...
@@ -75,6 +75,8 @@ __doc__ = br"""
True
True
>>> h == u'
\
\
ud800' # unescaped by Python (required by doctest)
>>> h == u'
\
\
ud800' # unescaped by Python (required by doctest)
True
True
>>> k == u'
\
\
N{SNOWMAN}' == u'
\
\
u2603'
True
>>> add == u'Søk ik' + u'üÖä' + 'abc'
>>> add == u'Søk ik' + u'üÖä' + 'abc'
True
True
>>> null == u'
\
\
x00' # unescaped by Python (required by doctest)
>>> null == u'
\
\
x00' # unescaped by Python (required by doctest)
...
@@ -107,6 +109,7 @@ e = u'\x03\x67\xf8\uf8d2Søk ik'
...
@@ -107,6 +109,7 @@ e = u'\x03\x67\xf8\uf8d2Søk ik'
f
=
u'
\
xf8
'
f
=
u'
\
xf8
'
g
=
u'
\
udc00
'
# lone trail surrogate
g
=
u'
\
udc00
'
# lone trail surrogate
h
=
u'
\
ud800
'
# lone lead surrogate
h
=
u'
\
ud800
'
# lone lead surrogate
k
=
u'
\
N{SNOWMAN}
'
add
=
u'Søk ik'
+
u'üÖä'
+
u'abc'
add
=
u'Søk ik'
+
u'üÖä'
+
u'abc'
null
=
u'
\
x00
'
null
=
u'
\
x00
'
...
...
tests/run/unicodeliteralslatin1.pyx
View file @
2a9d8d45
...
@@ -55,6 +55,8 @@ __doc__ = br"""
...
@@ -55,6 +55,8 @@ __doc__ = br"""
True
True
>>> f == u'
\
\
xf8' # unescaped by Python
>>> f == u'
\
\
xf8' # unescaped by Python
True
True
>>> k == u'' == u'
\
\
N{LATIN SMALL LETTER A WITH DIAERESIS}'
True
>>> add == u'Sk ik' + u'' + 'abc'
>>> add == u'Sk ik' + u'' + 'abc'
True
True
>>> null == u'
\
\
x00' # unescaped by Python (required by doctest)
>>> null == u'
\
\
x00' # unescaped by Python (required by doctest)
...
@@ -75,6 +77,7 @@ c = u'S
...
@@ -75,6 +77,7 @@ c = u'S
d
=
u''
d
=
u''
e
=
u'
\
x03
\
x67
\
xf8
\
uf8d2
Sk ik'
e
=
u'
\
x03
\
x67
\
xf8
\
uf8d2
Sk ik'
f
=
u'
\
xf8
'
f
=
u'
\
xf8
'
k
=
u'
\
N{LATIN SMALL LETTER A WITH DIAERESIS}
'
add
=
u'Sk ik'
+
u''
+
u'abc'
add
=
u'Sk ik'
+
u''
+
u'abc'
null
=
u'
\
x00
'
null
=
u'
\
x00
'
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment