Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
1ff08b12
Commit
1ff08b12
authored
Jan 15, 2001
by
Ka-Ping Yee
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add tokenizer support and tests for u'', U"", uR'', Ur"", etc.
parent
534c60f9
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
460 additions
and
374 deletions
+460
-374
Lib/test/output/test_tokenize
Lib/test/output/test_tokenize
+421
-365
Lib/test/tokenize_tests.py
Lib/test/tokenize_tests.py
+14
-0
Lib/tokenize.py
Lib/tokenize.py
+25
-9
No files found.
Lib/test/output/test_tokenize
View file @
1ff08b12
This diff is collapsed.
Click to expand it.
Lib/test/tokenize_tests.py
View file @
1ff08b12
...
...
@@ -110,6 +110,20 @@ y = r"""foo
bar \\ baz
"""
+
R'''spam
'''
x
=
u'abc'
+
U'ABC'
y
=
u"abc"
+
U"ABC"
x
=
ur'abc'
+
Ur'ABC'
+
uR'ABC'
+
UR'ABC'
y
=
ur"abc"
+
Ur"ABC"
+
uR"ABC"
+
UR"ABC"
x
=
ur'\\'
+
UR'\\'
x
=
ur'\''
+
''
y
=
ur'''
foo bar \\
baz'''
+
UR'''
foo'''
y
=
Ur"""foo
bar \\ baz
"""
+
uR'''spam
'''
# Indentation
if
1
:
...
...
Lib/tokenize.py
View file @
1ff08b12
...
...
@@ -54,10 +54,10 @@ Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
Single3
=
r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
# Tail end of """ string.
Double3
=
r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
Triple
=
group
(
"[
rR]?'''"
,
'
[rR]?"""'
)
Triple
=
group
(
"[
uU]?[rR]?'''"
,
'[uU]?
[rR]?"""'
)
# Single-line ' or " string.
String
=
group
(
r"[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'"
,
r'[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"'
)
String
=
group
(
r"[
uU]?[
rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'"
,
r'[
uU]?[
rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"'
)
# Because of leftmost-then-longest match semantics, be sure to put the
# longest operators first (e.g., if = came before ==, == would get
...
...
@@ -74,8 +74,10 @@ PlainToken = group(Number, Funny, String, Name)
Token = Ignore + PlainToken
# First (or only) line of ' or "
string
.
ContStr
=
group
(
r"[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*"
+
group
(
"'"
,
r'\\\r?\n'
),
r'[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*'
+
group
(
'"'
,
r'\\\r?\n'
))
ContStr
=
group
(
r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*"
+
group
(
"'"
,
r'\\\r?\n'
),
r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*'
+
group
(
'"'
,
r'\\\r?\n'
))
PseudoExtras
=
group
(
r'\\\r?\n'
,
Comment
,
Triple
)
PseudoToken
=
Whitespace
+
group
(
PseudoExtras
,
Number
,
Funny
,
ContStr
,
Name
)
...
...
@@ -84,7 +86,14 @@ tokenprog, pseudoprog, single3prog, double3prog = map(
endprogs
=
{
"'"
:
re
.
compile
(
Single
),
'"'
:
re
.
compile
(
Double
),
"'''"
:
single3prog
,
'"""'
:
double3prog
,
"r'''"
:
single3prog
,
'r"""'
:
double3prog
,
"R'''"
:
single3prog
,
'R"""'
:
double3prog
,
'r'
:
None
,
'R'
:
None
}
"u'''"
:
single3prog
,
'u"""'
:
double3prog
,
"ur'''"
:
single3prog
,
'ur"""'
:
double3prog
,
"R'''"
:
single3prog
,
'R"""'
:
double3prog
,
"U'''"
:
single3prog
,
'U"""'
:
double3prog
,
"uR'''"
:
single3prog
,
'uR"""'
:
double3prog
,
"Ur'''"
:
single3prog
,
'Ur"""'
:
double3prog
,
"UR'''"
:
single3prog
,
'UR"""'
:
double3prog
,
'r'
:
None
,
'R'
:
None
,
'u'
:
None
,
'U'
:
None
}
tabsize
=
8
...
...
@@ -172,7 +181,10 @@ def tokenize(readline, tokeneater=printtoken):
elif
initial
==
'#'
:
tokeneater
(
COMMENT
,
token
,
spos
,
epos
,
line
)
elif
token
in
(
"'''"
,
'"""'
,
# triple-quoted
"r'''"
,
'r"""'
,
"R'''"
,
'R"""'
):
"r'''"
,
'r"""'
,
"R'''"
,
'R"""'
,
"u'''"
,
'u"""'
,
"U'''"
,
'U"""'
,
"ur'''"
,
'ur"""'
,
"Ur'''"
,
'Ur"""'
,
"uR'''"
,
'uR"""'
,
"UR'''"
,
'UR"""'
):
endprog
=
endprogs
[
token
]
endmatch
=
endprog
.
match
(
line
,
pos
)
if
endmatch
:
# all on one line
...
...
@@ -185,10 +197,14 @@ def tokenize(readline, tokeneater=printtoken):
contline
=
line
break
elif
initial
in
(
"'"
,
'"'
)
or
\
token
[:
2
]
in
(
"r'"
,
'r"'
,
"R'"
,
'R"'
):
token
[:
2
]
in
(
"r'"
,
'r"'
,
"R'"
,
'R"'
,
"u'"
,
'u"'
,
"U'"
,
'U"'
)
or
\
token
[:
3
]
in
(
"ur'"
,
'ur"'
,
"Ur'"
,
'Ur"'
,
"uR'"
,
'uR"'
,
"UR'"
,
'UR"'
):
if
token
[
-
1
]
==
'
\
n
'
:
# continued string
strstart
=
(
lnum
,
start
)
endprog
=
endprogs
[
initial
]
or
endprogs
[
token
[
1
]]
endprog
=
(
endprogs
[
initial
]
or
endprogs
[
token
[
1
]]
or
endprogs
[
token
[
2
]])
contstr
,
needcont
=
line
[
start
:],
1
contline
=
line
break
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment