Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
a1f45ec7
Commit
a1f45ec7
authored
Oct 24, 2018
by
Tal Einat
Committed by
Benjamin Peterson
Oct 24, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bpo-33899: Revert tokenize module adding an implicit final NEWLINE (GH-10072)
This reverts commit
7829bba4
.
parent
56a4a3aa
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
12 additions
and
47 deletions
+12
-47
Lib/test/test_tokenize.py
Lib/test/test_tokenize.py
+12
-34
Lib/tokenize.py
Lib/tokenize.py
+0
-10
Misc/NEWS.d/next/Library/2018-06-24-01-57-14.bpo-33899.IaOcAr.rst
...S.d/next/Library/2018-06-24-01-57-14.bpo-33899.IaOcAr.rst
+0
-3
No files found.
Lib/test/test_tokenize.py
View file @
a1f45ec7
from
test
import
test_support
from
test
import
test_support
from
tokenize
import
(
untokenize
,
generate_tokens
,
NUMBER
,
NAME
,
OP
,
NEWLINE
,
from
tokenize
import
(
untokenize
,
generate_tokens
,
NUMBER
,
NAME
,
OP
,
STRING
,
ENDMARKER
,
tok_name
,
Untokenizer
,
tokenize
)
STRING
,
ENDMARKER
,
tok_name
,
Untokenizer
,
tokenize
)
from
StringIO
import
StringIO
from
StringIO
import
StringIO
import
os
import
os
from
unittest
import
TestCase
from
unittest
import
TestCase
# Converts a source string into a list of textual representation
# of the tokens such as:
# ` NAME 'if' (1, 0) (1, 2)`
# to make writing tests easier.
def
stringify_tokens_from_source
(
token_generator
,
source_string
):
result
=
[]
num_lines
=
len
(
source_string
.
splitlines
())
missing_trailing_nl
=
source_string
[
-
1
]
not
in
'
\
r
\
n
'
for
type
,
token
,
start
,
end
,
line
in
token_generator
:
if
type
==
ENDMARKER
:
break
# Ignore the new line on the last line if the input lacks one
if
missing_trailing_nl
and
type
==
NEWLINE
and
end
[
0
]
==
num_lines
:
continue
type
=
tok_name
[
type
]
result
.
append
(
" %(type)-10.10s %(token)-13.13r %(start)s %(end)s"
%
locals
())
return
result
class
TokenizeTest
(
TestCase
):
class
TokenizeTest
(
TestCase
):
# Tests for the tokenize module.
# Tests for the tokenize module.
# The tests can be really simple. Given a small fragment of source
# The tests can be really simple. Given a small fragment of source
# code, print out a table with tokens. The ENDMARKER
, ENCODING and
# code, print out a table with tokens. The ENDMARKER
is omitted for
#
final NEWLINE are omitted for
brevity.
# brevity.
def
check_tokenize
(
self
,
s
,
expected
):
def
check_tokenize
(
self
,
s
,
expected
):
# Format the tokens in s in a table format.
# Format the tokens in s in a table format.
# The ENDMARKER is omitted.
result
=
[]
f
=
StringIO
(
s
)
f
=
StringIO
(
s
)
result
=
stringify_tokens_from_source
(
generate_tokens
(
f
.
readline
),
s
)
for
type
,
token
,
start
,
end
,
line
in
generate_tokens
(
f
.
readline
):
if
type
==
ENDMARKER
:
break
type
=
tok_name
[
type
]
result
.
append
(
" %(type)-10.10s %(token)-13.13r %(start)s %(end)s"
%
locals
())
self
.
assertEqual
(
result
,
self
.
assertEqual
(
result
,
expected
.
rstrip
().
splitlines
())
expected
.
rstrip
().
splitlines
())
def
test_implicit_newline
(
self
):
# Make sure that the tokenizer puts in an implicit NEWLINE
# when the input lacks a trailing new line.
f
=
StringIO
(
"x"
)
tokens
=
list
(
generate_tokens
(
f
.
readline
))
self
.
assertEqual
(
tokens
[
-
2
][
0
],
NEWLINE
)
self
.
assertEqual
(
tokens
[
-
1
][
0
],
ENDMARKER
)
def
test_basic
(
self
):
def
test_basic
(
self
):
self
.
check_tokenize
(
"1 + 1"
,
"""
\
self
.
check_tokenize
(
"1 + 1"
,
"""
\
...
@@ -638,7 +616,7 @@ class TestRoundtrip(TestCase):
...
@@ -638,7 +616,7 @@ class TestRoundtrip(TestCase):
self
.
check_roundtrip
(
"if x == 1:
\
n
"
self
.
check_roundtrip
(
"if x == 1:
\
n
"
" print x
\
n
"
)
" print x
\
n
"
)
self
.
check_roundtrip
(
"# This is a comment
\
n
"
self
.
check_roundtrip
(
"# This is a comment
\
n
"
"# This also
\
n
"
)
"# This also"
)
# Some people use different formatting conventions, which makes
# Some people use different formatting conventions, which makes
# untokenize a little trickier. Note that this test involves trailing
# untokenize a little trickier. Note that this test involves trailing
...
...
Lib/tokenize.py
View file @
a1f45ec7
...
@@ -306,15 +306,8 @@ def generate_tokens(readline):
...
@@ -306,15 +306,8 @@ def generate_tokens(readline):
contline = None
contline = None
indents = [0]
indents = [0]
last_line = b''
line = b''
while 1: # loop over lines in stream
while 1: # loop over lines in stream
try:
try:
# We capture the value of the line variable here because
# readline uses the empty string '' to signal end of input,
# hence `line` itself will always be overwritten at the end
# of this loop.
last_line = line
line = readline()
line = readline()
except StopIteration:
except StopIteration:
line = ''
line = ''
...
@@ -444,9 +437,6 @@ def generate_tokens(readline):
...
@@ -444,9 +437,6 @@ def generate_tokens(readline):
(
lnum
,
pos
),
(
lnum
,
pos
+
1
),
line
)
(
lnum
,
pos
),
(
lnum
,
pos
+
1
),
line
)
pos
+=
1
pos
+=
1
# Add an implicit NEWLINE if the input doesn't end in one
if
last_line
and
last_line
[
-
1
]
not
in
'
\
r
\
n
'
:
yield
(
NEWLINE
,
''
,
(
lnum
-
1
,
len
(
last_line
)),
(
lnum
-
1
,
len
(
last_line
)
+
1
),
''
)
for
indent
in
indents
[
1
:]:
# pop remaining indent levels
for
indent
in
indents
[
1
:]:
# pop remaining indent levels
yield
(
DEDENT
,
''
,
(
lnum
,
0
),
(
lnum
,
0
),
''
)
yield
(
DEDENT
,
''
,
(
lnum
,
0
),
(
lnum
,
0
),
''
)
yield
(
ENDMARKER
,
''
,
(
lnum
,
0
),
(
lnum
,
0
),
''
)
yield
(
ENDMARKER
,
''
,
(
lnum
,
0
),
(
lnum
,
0
),
''
)
...
...
Misc/NEWS.d/next/Library/2018-06-24-01-57-14.bpo-33899.IaOcAr.rst
deleted
100644 → 0
View file @
56a4a3aa
Tokenize module now implicitly emits a NEWLINE when provided with input that
does not have a trailing new line. This behavior now matches what the C
tokenizer does internally. Contributed by Ammar Askar.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment