Commit a1f45ec7 authored by Tal Einat's avatar Tal Einat Committed by Benjamin Peterson

bpo-33899: Revert tokenize module adding an implicit final NEWLINE (GH-10072)

This reverts commit 7829bba4.
parent 56a4a3aa
from test import test_support
from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP, NEWLINE,
from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
STRING, ENDMARKER, tok_name, Untokenizer, tokenize)
from StringIO import StringIO
import os
from unittest import TestCase
# Converts a source string into a list of textual representation
# of the tokens such as:
# ` NAME 'if' (1, 0) (1, 2)`
# to make writing tests easier.
def stringify_tokens_from_source(token_generator, source_string):
result = []
num_lines = len(source_string.splitlines())
missing_trailing_nl = source_string[-1] not in '\r\n'
for type, token, start, end, line in token_generator:
if type == ENDMARKER:
# Ignore the new line on the last line if the input lacks one
if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
type = tok_name[type]
result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
return result
class TokenizeTest(TestCase):
# Tests for the tokenize module.
# The tests can be really simple. Given a small fragment of source
# code, print out a table with tokens. The ENDMARKER, ENCODING and
# final NEWLINE are omitted for brevity.
# code, print out a table with tokens. The ENDMARKER is omitted for
# brevity.
def check_tokenize(self, s, expected):
# Format the tokens in s in a table format.
# The ENDMARKER is omitted.
result = []
f = StringIO(s)
result = stringify_tokens_from_source(generate_tokens(f.readline), s)
for type, token, start, end, line in generate_tokens(f.readline):
if type == ENDMARKER:
type = tok_name[type]
result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
def test_implicit_newline(self):
# Make sure that the tokenizer puts in an implicit NEWLINE
# when the input lacks a trailing new line.
f = StringIO("x")
tokens = list(generate_tokens(f.readline))
self.assertEqual(tokens[-2][0], NEWLINE)
self.assertEqual(tokens[-1][0], ENDMARKER)
def test_basic(self):
self.check_tokenize("1 + 1", """\
......@@ -638,7 +616,7 @@ class TestRoundtrip(TestCase):
self.check_roundtrip("if x == 1:\n"
" print x\n")
self.check_roundtrip("# This is a comment\n"
"# This also\n")
"# This also")
# Some people use different formatting conventions, which makes
# untokenize a little trickier. Note that this test involves trailing
......@@ -306,15 +306,8 @@ def generate_tokens(readline):
contline = None
indents = [0]
last_line = b''
line = b''
while 1: # loop over lines in stream
# We capture the value of the line variable here because
# readline uses the empty string '' to signal end of input,
# hence `line` itself will always be overwritten at the end
# of this loop.
last_line = line
line = readline()
except StopIteration:
line = ''
......@@ -444,9 +437,6 @@ def generate_tokens(readline):
(lnum, pos), (lnum, pos+1), line)
pos += 1
# Add an implicit NEWLINE if the input doesn't end in one
if last_line and last_line[-1] not in '\r\n':
yield (NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
for indent in indents[1:]: # pop remaining indent levels
yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
Tokenize module now implicitly emits a NEWLINE when provided with input that
does not have a trailing new line. This behavior now matches what the C
tokenizer does internally. Contributed by Ammar Askar.
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment