bpo-33899: Revert tokenize module adding an implicit final NEWLINE (GH-10072)

This reverts commit 7829bba4.

bpo-33899: Revert tokenize module adding an implicit final NEWLINE (GH-10072)
This reverts commit 7829bba4.
a1f45ec7 · Tal Einat · Benjamin Peterson · 56a4a3aa · a1f45ec7 · a1f45ec7
Commit a1f45ec7 authored Oct 24, 2018 by Tal Einat Committed by Benjamin Peterson Oct 24, 2018
3 changed files
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
 from test import test_support
-from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP, NEWLINE,
+from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
                     STRING, ENDMARKER, tok_name, Untokenizer, tokenize)
 from StringIO import StringIO
 import os
 from unittest import TestCase


-# Converts a source string into a list of textual representation
-# of the tokens such as:
-# `    NAME       'if'          (1, 0) (1, 2)`
-# to make writing tests easier.
-def stringify_tokens_from_source(token_generator, source_string):
-    result = []
-    num_lines = len(source_string.splitlines())
-    missing_trailing_nl = source_string[-1] not in '\r\n'
-
-    for type, token, start, end, line in token_generator:
-        if type == ENDMARKER:
-            break
-        # Ignore the new line on the last line if the input lacks one
-        if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
-            continue
-        type = tok_name[type]
-        result.append("    %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
-                          locals())
-
-    return result
-
 class TokenizeTest(TestCase):
    # Tests for the tokenize module.

    # The tests can be really simple. Given a small fragment of source
-    # code, print out a table with tokens. The ENDMARKER, ENCODING and
-    # final NEWLINE are omitted for brevity.
+    # code, print out a table with tokens. The ENDMARKER is omitted for
+    # brevity.

    def check_tokenize(self, s, expected):
        # Format the tokens in s in a table format.
+        # The ENDMARKER is omitted.
+        result = []
        f = StringIO(s)
-        result = stringify_tokens_from_source(generate_tokens(f.readline), s)
-
+        for type, token, start, end, line in generate_tokens(f.readline):
+            if type == ENDMARKER:
+                break
+            type = tok_name[type]
+            result.append("    %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
+                          locals())
        self.assertEqual(result,
                         expected.rstrip().splitlines())

-    def test_implicit_newline(self):
-        # Make sure that the tokenizer puts in an implicit NEWLINE
-        # when the input lacks a trailing new line.
-        f = StringIO("x")
-        tokens = list(generate_tokens(f.readline))
-        self.assertEqual(tokens[-2][0], NEWLINE)
-        self.assertEqual(tokens[-1][0], ENDMARKER)

    def test_basic(self):
        self.check_tokenize("1 + 1", """\
@@ -638,7 +616,7 @@ class TestRoundtrip(TestCase):
        self.check_roundtrip("if x == 1:\n"
                             "    print x\n")
        self.check_roundtrip("# This is a comment\n"
-                             "# This also\n")
+                             "# This also")

        # Some people use different formatting conventions, which makes
        # untokenize a little trickier. Note that this test involves trailing

--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -306,15 +306,8 @@ def generate_tokens(readline):
    contline = None
    indents = [0]

-    last_line = b''
-    line = b''
    while 1:                                   # loop over lines in stream
        try:
-            # We capture the value of the line variable here because
-            # readline uses the empty string '' to signal end of input,
-            # hence `line` itself will always be overwritten at the end
-            # of this loop.
-            last_line = line
            line = readline()
        except StopIteration:
            line = ''
@@ -444,9 +437,6 @@ def generate_tokens(readline):
                           (lnum, pos), (lnum, pos+1), line)
                pos += 1

-    # Add an implicit NEWLINE if the input doesn't end in one
-    if last_line and last_line[-1] not in '\r\n':
-        yield (NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
    for indent in indents[1:]:                 # pop remaining indent levels
        yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
    yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')

--- a/Misc/NEWS.d/next/Library/2018-06-24-01-57-14.bpo-33899.IaOcAr.rst
+++ b/Misc/NEWS.d/next/Library/2018-06-24-01-57-14.bpo-33899.IaOcAr.rst
-Tokenize module now implicitly emits a NEWLINE when provided with input that
-does not have a trailing new line.  This behavior now matches what the C
-tokenizer does internally.  Contributed by Ammar Askar.