Commit 14c0f03b authored by Meador Inge's avatar Meador Inge

Issue #12943: python -m tokenize support has been added to tokenize.

parent 1d972ad1
...@@ -15,6 +15,9 @@ implemented in Python. The scanner in this module returns comments as tokens ...@@ -15,6 +15,9 @@ implemented in Python. The scanner in this module returns comments as tokens
as well, making it useful for implementing "pretty-printers," including as well, making it useful for implementing "pretty-printers," including
colorizers for on-screen displays. colorizers for on-screen displays.
Tokenizing Input
----------------
The primary entry point is a :term:`generator`: The primary entry point is a :term:`generator`:
.. function:: tokenize(readline) .. function:: tokenize(readline)
...@@ -116,6 +119,26 @@ function it uses to do this is available: ...@@ -116,6 +119,26 @@ function it uses to do this is available:
.. versionadded:: 3.2 .. versionadded:: 3.2
.. _tokenize-cli:
Command-Line Usage
------------------
.. versionadded:: 3.3
The :mod:`tokenize` module can be executed as a script from the command line.
It is as simple as:
.. code-block:: sh
python -m tokenize [filename.py]
If :file:`filename.py` is specified its contents are tokenized to stdout.
Otherwise, tokenization is performed on stdin.
Examples
------------------
Example of a script rewriter that transforms float literals into Decimal Example of a script rewriter that transforms float literals into Decimal
objects:: objects::
...@@ -158,3 +181,37 @@ objects:: ...@@ -158,3 +181,37 @@ objects::
result.append((toknum, tokval)) result.append((toknum, tokval))
return untokenize(result).decode('utf-8') return untokenize(result).decode('utf-8')
Example of tokenizing from the command line. The script::
def say_hello():
print("Hello, World!")
say_hello()
will be tokenized to the following output where the first column is the range
of the line/column coordinates where the token is found, the second column is
the name of the token, and the final column is the value of the token (if any)
.. code-block:: sh
$ python -m tokenize hello.py
0,0-0,0: ENCODING 'utf-8'
1,0-1,3: NAME 'def'
1,4-1,13: NAME 'say_hello'
1,13-1,14: OP '('
1,14-1,15: OP ')'
1,15-1,16: OP ':'
1,16-1,17: NEWLINE '\n'
2,0-2,4: INDENT ' '
2,4-2,9: NAME 'print'
2,9-2,10: OP '('
2,10-2,25: STRING '"Hello, World!"'
2,25-2,26: OP ')'
2,26-2,27: NEWLINE '\n'
3,0-3,1: NL '\n'
4,0-4,0: DEDENT ''
4,0-4,9: NAME 'say_hello'
4,9-4,10: OP '('
4,10-4,11: OP ')'
4,11-4,12: NEWLINE '\n'
5,0-5,0: ENDMARKER ''
...@@ -530,27 +530,60 @@ def _tokenize(readline, encoding): ...@@ -530,27 +530,60 @@ def _tokenize(readline, encoding):
def generate_tokens(readline): def generate_tokens(readline):
return _tokenize(readline, None) return _tokenize(readline, None)
if __name__ == "__main__": def main():
# Quick sanity check import argparse
s = b'''def parseline(self, line):
"""Parse the line into a command name and a string containing # Helper error handling routines
the arguments. Returns a tuple containing (command, args, line). def perror(message):
'command' and 'args' may be None if the line couldn't be parsed. print(message, file=sys.stderr)
"""
line = line.strip() def error(message, filename=None, location=None):
if not line: if location:
return None, None, line args = (filename,) + location + (message,)
elif line[0] == '?': perror("%s:%d:%d: error: %s" % args)
line = 'help ' + line[1:] elif filename:
elif line[0] == '!': perror("%s: error: %s" % (filename, message))
if hasattr(self, 'do_shell'):
line = 'shell ' + line[1:]
else: else:
return None, None, line perror("error: %s" % message)
i, n = 0, len(line) sys.exit(1)
while i < n and line[i] in self.identchars: i = i+1
cmd, arg = line[:i], line[i:].strip() # Parse the arguments and options
return cmd, arg, line parser = argparse.ArgumentParser(prog='python -m tokenize')
''' parser.add_argument(dest='filename', nargs='?',
for tok in tokenize(iter(s.splitlines()).__next__): metavar='filename.py',
print(tok) help='the file to tokenize; defaults to stdin')
args = parser.parse_args()
try:
# Tokenize the input
if args.filename:
filename = args.filename
with builtins.open(filename, 'rb') as f:
tokens = list(tokenize(f.readline))
else:
filename = "<stdin>"
tokens = _tokenize(sys.stdin.readline, None)
# Output the tokenization
for token in tokens:
token_range = "%d,%d-%d,%d:" % (token.start + token.end)
print("%-20s%-15s%-15r" %
(token_range, tok_name[token.type], token.string))
except IndentationError as err:
line, column = err.args[1][1:3]
error(err.args[0], filename, (line, column))
except TokenError as err:
line, column = err.args[1]
error(err.args[0], filename, (line, column))
except SyntaxError as err:
error(err, filename)
except IOError as err:
error(err)
except KeyboardInterrupt:
print("interrupted\n")
except Exception as err:
perror("unexpected error: %s" % err)
raise
if __name__ == "__main__":
main()
...@@ -2520,6 +2520,8 @@ Core and Builtins ...@@ -2520,6 +2520,8 @@ Core and Builtins
Library Library
------- -------
- Issue #12943: python -m tokenize support has been added to tokenize.
- Issue #10465: fix broken delegating of attributes by gzip._PaddedFile. - Issue #10465: fix broken delegating of attributes by gzip._PaddedFile.
- Issue #10356: Decimal.__hash__(-1) should return -2. - Issue #10356: Decimal.__hash__(-1) should return -2.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment