Issue #12943: python -m tokenize support has been added to tokenize.

14c0f03b · Meador Inge · 1d972ad1 · 14c0f03b · 14c0f03b · 14c0f03b
Commit 14c0f03b authored Oct 07, 2011 by Meador Inge
Show whitespace changes
Inline Side-by-side

Showing with 115 additions and 23 deletions

Doc/library/tokenize.rst Doc/library/tokenize.rst +57 -0

Lib/tokenize.py Lib/tokenize.py +56 -23

Misc/NEWS Misc/NEWS +2 -0

No files found.
--- a/Doc/library/tokenize.rst
+++ b/Doc/library/tokenize.rst
@@ -15,6 +15,9 @@ implemented in Python.  The scanner in this module returns comments as tokens
 as well, making it useful for implementing "pretty-printers," including
 colorizers for on-screen displays.
+Tokenizing Input
+----------------
 The primary entry point is a :term:`generator`:
 .. function:: tokenize(readline)
@@ -116,6 +119,26 @@ function it uses to do this is available:
   .. versionadded:: 3.2
+.. _tokenize-cli:
+Command-Line Usage
+------------------
+.. versionadded:: 3.3
+The :mod:`tokenize` module can be executed as a script from the command line.
+It is as simple as:
+.. code-block:: sh
+   python -m tokenize [filename.py]
+If :file:`filename.py` is specified its contents are tokenized to stdout.
+Otherwise, tokenization is performed on stdin.
+Examples
+------------------
 Example of a script rewriter that transforms float literals into Decimal
 objects::
@@ -158,3 +181,37 @@ objects::
                result.append((toknum, tokval))
        return untokenize(result).decode('utf-8')
+Example of tokenizing from the command line.  The script::
+    def say_hello():
+        print("Hello, World!")
+    say_hello()
+will be tokenized to the following output where the first column is the range
+of the line/column coordinates where the token is found, the second column is
+the name of the token, and the final column is the value of the token (if any)
+.. code-block:: sh
+    $ python -m tokenize hello.py
+    0,0-0,0:            ENCODING       'utf-8'
+    1,0-1,3:            NAME           'def'
+    1,4-1,13:           NAME           'say_hello'
+    1,13-1,14:          OP             '('
+    1,14-1,15:          OP             ')'
+    1,15-1,16:          OP             ':'
+    1,16-1,17:          NEWLINE        '\n'
+    2,0-2,4:            INDENT         '    '
+    2,4-2,9:            NAME           'print'
+    2,9-2,10:           OP             '('
+    2,10-2,25:          STRING         '"Hello, World!"'
+    2,25-2,26:          OP             ')'
+    2,26-2,27:          NEWLINE        '\n'
+    3,0-3,1:            NL             '\n'
+    4,0-4,0:            DEDENT         ''
+    4,0-4,9:            NAME           'say_hello'
+    4,9-4,10:           OP             '('
+    4,10-4,11:          OP             ')'
+    4,11-4,12:          NEWLINE        '\n'
+    5,0-5,0:            ENDMARKER      ''
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -530,27 +530,60 @@ def _tokenize(readline, encoding):
 def generate_tokens(readline):
    return _tokenize(readline, None)
-if __name__ == "__main__":
+def main():
-    # Quick sanity check
+    import argparse
-    s = b'''def parseline(self, line):
-            """Parse the line into a command name and a string containing
+    # Helper error handling routines
-            the arguments.  Returns a tuple containing (command, args, line).
+    def perror(message):
-            'command' and 'args' may be None if the line couldn't be parsed.
+        print(message, file=sys.stderr)
-            """
-            line = line.strip()
+    def error(message, filename=None, location=None):
-            if not line:
+        if location:
-                return None, None, line
+            args = (filename,) + location + (message,)
-            elif line[0] == '?':
+            perror("%s:%d:%d: error: %s" % args)
-                line = 'help ' + line[1:]
+        elif filename:
-            elif line[0] == '!':
+            perror("%s: error: %s" % (filename, message))
-                if hasattr(self, 'do_shell'):
-                    line = 'shell ' + line[1:]
        else:
-                    return None, None, line
+            perror("error: %s" % message)
-            i, n = 0, len(line)
+        sys.exit(1)
-            while i < n and line[i] in self.identchars: i = i+1
-            cmd, arg = line[:i], line[i:].strip()
+    # Parse the arguments and options
-            return cmd, arg, line
+    parser = argparse.ArgumentParser(prog='python -m tokenize')
-    '''
+    parser.add_argument(dest='filename', nargs='?',
-    for tok in tokenize(iter(s.splitlines()).__next__):
+                        metavar='filename.py',
-        print(tok)
+                        help='the file to tokenize; defaults to stdin')
+    args = parser.parse_args()
+    try:
+        # Tokenize the input
+        if args.filename:
+            filename = args.filename
+            with builtins.open(filename, 'rb') as f:
+                tokens = list(tokenize(f.readline))
+        else:
+            filename = "<stdin>"
+            tokens = _tokenize(sys.stdin.readline, None)
+        # Output the tokenization
+        for token in tokens:
+            token_range = "%d,%d-%d,%d:" % (token.start + token.end)
+            print("%-20s%-15s%-15r" %
+                  (token_range, tok_name[token.type], token.string))
+    except IndentationError as err:
+        line, column = err.args[1][1:3]
+        error(err.args[0], filename, (line, column))
+    except TokenError as err:
+        line, column = err.args[1]
+        error(err.args[0], filename, (line, column))
+    except SyntaxError as err:
+        error(err, filename)
+    except IOError as err:
+        error(err)
+    except KeyboardInterrupt:
+        print("interrupted\n")
+    except Exception as err:
+        perror("unexpected error: %s" % err)
+        raise
+if __name__ == "__main__":
+    main()
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -2520,6 +2520,8 @@ Core and Builtins
 Library
 -------
+- Issue #12943: python -m tokenize support has been added to tokenize.
 - Issue #10465: fix broken delegating of attributes by gzip._PaddedFile.
 - Issue #10356: Decimal.__hash__(-1) should return -2.