Commit a95a476b authored by Jason R. Coombs's avatar Jason R. Coombs

Issue #20387: Merge test and patch from 3.4.4

parents b9b9e7b4 50373e6c
...@@ -5,6 +5,8 @@ The tests can be really simple. Given a small fragment of source ...@@ -5,6 +5,8 @@ The tests can be really simple. Given a small fragment of source
code, print out a table with tokens. The ENDMARKER is omitted for code, print out a table with tokens. The ENDMARKER is omitted for
brevity. brevity.
>>> import glob
>>> dump_tokens("1 + 1") >>> dump_tokens("1 + 1")
ENCODING 'utf-8' (0, 0) (0, 0) ENCODING 'utf-8' (0, 0) (0, 0)
NUMBER '1' (1, 0) (1, 1) NUMBER '1' (1, 0) (1, 1)
...@@ -835,7 +837,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, ...@@ -835,7 +837,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
open as tokenize_open, Untokenizer) open as tokenize_open, Untokenizer)
from io import BytesIO from io import BytesIO
from unittest import TestCase, mock from unittest import TestCase, mock
import os, sys, glob import os
import token import token
def dump_tokens(s): def dump_tokens(s):
...@@ -1427,6 +1429,22 @@ class UntokenizeTest(TestCase): ...@@ -1427,6 +1429,22 @@ class UntokenizeTest(TestCase):
self.assertEqual(untokenize(iter(tokens)), b'Hello ') self.assertEqual(untokenize(iter(tokens)), b'Hello ')
class TestRoundtrip(TestCase):
def roundtrip(self, code):
if isinstance(code, str):
code = code.encode('utf-8')
return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
def test_indentation_semantics_retained(self):
"""
Ensure that although whitespace might be mutated in a roundtrip,
the semantic meaning of the indentation remains consistent.
"""
code = "if False:\n\tx=3\n\tx=3\n"
codelines = self.roundtrip(code).split('\n')
self.assertEqual(codelines[1], codelines[2])
__test__ = {"doctests" : doctests, 'decistmt': decistmt} __test__ = {"doctests" : doctests, 'decistmt': decistmt}
def test_main(): def test_main():
...@@ -1437,6 +1455,7 @@ def test_main(): ...@@ -1437,6 +1455,7 @@ def test_main():
support.run_unittest(TestDetectEncoding) support.run_unittest(TestDetectEncoding)
support.run_unittest(TestTokenize) support.run_unittest(TestTokenize)
support.run_unittest(UntokenizeTest) support.run_unittest(UntokenizeTest)
support.run_unittest(TestRoundtrip)
if __name__ == "__main__": if __name__ == "__main__":
test_main() test_main()
...@@ -244,6 +244,8 @@ class Untokenizer: ...@@ -244,6 +244,8 @@ class Untokenizer:
def untokenize(self, iterable): def untokenize(self, iterable):
it = iter(iterable) it = iter(iterable)
indents = []
startline = False
for t in it: for t in it:
if len(t) == 2: if len(t) == 2:
self.compat(t, it) self.compat(t, it)
...@@ -254,6 +256,21 @@ class Untokenizer: ...@@ -254,6 +256,21 @@ class Untokenizer:
continue continue
if tok_type == ENDMARKER: if tok_type == ENDMARKER:
break break
if tok_type == INDENT:
indents.append(token)
continue
elif tok_type == DEDENT:
indents.pop()
self.prev_row, self.prev_col = end
continue
elif tok_type in (NEWLINE, NL):
startline = True
elif startline and indents:
indent = indents[-1]
if start[1] >= len(indent):
self.tokens.append(indent)
self.prev_col = len(indent)
startline = False
self.add_whitespace(start) self.add_whitespace(start)
self.tokens.append(token) self.tokens.append(token)
self.prev_row, self.prev_col = end self.prev_row, self.prev_col = end
......
...@@ -25,6 +25,9 @@ Core and Builtins ...@@ -25,6 +25,9 @@ Core and Builtins
Library Library
------- -------
- Issue #20387: Restore semantic round-trip correctness in tokenize/untokenize
for tab-indented blocks.
- Issue #24456: Fixed possible buffer over-read in adpcm2lin() and lin2adpcm() - Issue #24456: Fixed possible buffer over-read in adpcm2lin() and lin2adpcm()
functions of the audioop module. functions of the audioop module.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment