Commit 80c29ac1 authored by Jason R. Coombs's avatar Jason R. Coombs

Issue #20387: Merge patch and test

parents 101ff354 d1d628d5
...@@ -5,6 +5,8 @@ The tests can be really simple. Given a small fragment of source ...@@ -5,6 +5,8 @@ The tests can be really simple. Given a small fragment of source
code, print out a table with tokens. The ENDMARKER is omitted for code, print out a table with tokens. The ENDMARKER is omitted for
brevity. brevity.
>>> import glob
>>> dump_tokens("1 + 1") >>> dump_tokens("1 + 1")
ENCODING 'utf-8' (0, 0) (0, 0) ENCODING 'utf-8' (0, 0) (0, 0)
NUMBER '1' (1, 0) (1, 1) NUMBER '1' (1, 0) (1, 1)
...@@ -647,7 +649,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, ...@@ -647,7 +649,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
open as tokenize_open, Untokenizer) open as tokenize_open, Untokenizer)
from io import BytesIO from io import BytesIO
from unittest import TestCase, mock from unittest import TestCase, mock
import os, sys, glob import os
import token import token
def dump_tokens(s): def dump_tokens(s):
...@@ -1227,6 +1229,22 @@ class UntokenizeTest(TestCase): ...@@ -1227,6 +1229,22 @@ class UntokenizeTest(TestCase):
self.assertEqual(untokenize(iter(tokens)), b'Hello ') self.assertEqual(untokenize(iter(tokens)), b'Hello ')
class TestRoundtrip(TestCase):
def roundtrip(self, code):
if isinstance(code, str):
code = code.encode('utf-8')
return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
def test_indentation_semantics_retained(self):
"""
Ensure that although whitespace might be mutated in a roundtrip,
the semantic meaning of the indentation remains consistent.
"""
code = "if False:\n\tx=3\n\tx=3\n"
codelines = self.roundtrip(code).split('\n')
self.assertEqual(codelines[1], codelines[2])
__test__ = {"doctests" : doctests, 'decistmt': decistmt} __test__ = {"doctests" : doctests, 'decistmt': decistmt}
def test_main(): def test_main():
...@@ -1237,6 +1255,7 @@ def test_main(): ...@@ -1237,6 +1255,7 @@ def test_main():
support.run_unittest(TestDetectEncoding) support.run_unittest(TestDetectEncoding)
support.run_unittest(TestTokenize) support.run_unittest(TestTokenize)
support.run_unittest(UntokenizeTest) support.run_unittest(UntokenizeTest)
support.run_unittest(TestRoundtrip)
if __name__ == "__main__": if __name__ == "__main__":
test_main() test_main()
...@@ -244,6 +244,8 @@ class Untokenizer: ...@@ -244,6 +244,8 @@ class Untokenizer:
def untokenize(self, iterable): def untokenize(self, iterable):
it = iter(iterable) it = iter(iterable)
indents = []
startline = False
for t in it: for t in it:
if len(t) == 2: if len(t) == 2:
self.compat(t, it) self.compat(t, it)
...@@ -254,6 +256,21 @@ class Untokenizer: ...@@ -254,6 +256,21 @@ class Untokenizer:
continue continue
if tok_type == ENDMARKER: if tok_type == ENDMARKER:
break break
if tok_type == INDENT:
indents.append(token)
continue
elif tok_type == DEDENT:
indents.pop()
self.prev_row, self.prev_col = end
continue
elif tok_type in (NEWLINE, NL):
startline = True
elif startline and indents:
indent = indents[-1]
if start[1] >= len(indent):
self.tokens.append(indent)
self.prev_col = len(indent)
startline = False
self.add_whitespace(start) self.add_whitespace(start)
self.tokens.append(token) self.tokens.append(token)
self.prev_row, self.prev_col = end self.prev_row, self.prev_col = end
......
...@@ -60,6 +60,9 @@ Core and Builtins ...@@ -60,6 +60,9 @@ Core and Builtins
Library Library
------- -------
- Issue #20387: Restore semantic round-trip correctness in tokenize/untokenize
for tab-indented blocks.
- Issue #24336: The contextmanager decorator now works with functions with - Issue #24336: The contextmanager decorator now works with functions with
keyword arguments called "func" and "self". Patch by Martin Panter. keyword arguments called "func" and "self". Patch by Martin Panter.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment