bpo-35766: Merge typed_ast back into CPython (GH-11645)

dcfcd146 · Guido van Rossum · Łukasz Langa · d97daebf · dcfcd146 · dcfcd146
Commit dcfcd146 authored Jan 31, 2019 by Guido van Rossum Committed by Łukasz Langa Jan 31, 2019
30 changed files
--- a/Doc/library/ast.rst
+++ b/Doc/library/ast.rst
@@ -126,16 +126,33 @@ The abstract grammar is currently defined as follows:
 Apart from the node classes, the :mod:`ast` module defines these utility functions
 and classes for traversing abstract syntax trees:

-.. function:: parse(source, filename='<unknown>', mode='exec')
+.. function:: parse(source, filename='<unknown>', mode='exec', *, type_comments=False)

   Parse the source into an AST node.  Equivalent to ``compile(source,
   filename, mode, ast.PyCF_ONLY_AST)``.

+   If ``type_comments=True`` is given, the parser is modified to check
+   and return type comments as specified by :pep:`484` and :pep:`526`.
+   This is equivalent to adding :data:`ast.PyCF_TYPE_COMMENTS` to the
+   flags passed to :func:`compile()`.  This will report syntax errors
+   for misplaced type comments.  Without this flag, type comments will
+   be ignored, and the ``type_comment`` field on selected AST nodes
+   will always be ``None``.  In addition, the locations of ``# type:
+   ignore`` comments will be returned as the ``type_ignores``
+   attribute of :class:`Module` (otherwise it is always an empty list).
+
+   In addition, if ``mode`` is ``'func_type'``, the input syntax is
+   modified to correspond to :pep:`484` "signature type comments",
+   e.g. ``(str, int) -> List[str]``.
+
   .. warning::
      It is possible to crash the Python interpreter with a
      sufficiently large/complex string due to stack depth limitations
      in Python's AST compiler.

+   .. versionchanged:: 3.8
+      Added ``type_comments=True`` and ``mode='func_type'``.
+

 .. function:: literal_eval(node_or_string)


--- a/Doc/library/token-list.inc
+++ b/Doc/library/token-list.inc
@@ -203,6 +203,10 @@

 .. data:: OP

+.. data:: TYPE_IGNORE
+
+.. data:: TYPE_COMMENT
+
 .. data:: ERRORTOKEN

 .. data:: N_TOKENS

--- a/Doc/library/token.rst
+++ b/Doc/library/token.rst
@@ -69,6 +69,13 @@ the :mod:`tokenize` module.
   always be an ``ENCODING`` token.


+.. data:: TYPE_COMMENT
+
+   Token value indicating that a type comment was recognized.  Such
+   tokens are only produced when :func:`ast.parse()` is invoked with
+   ``type_comments=True``.
+
+
 .. versionchanged:: 3.5
   Added :data:`AWAIT` and :data:`ASYNC` tokens.

@@ -78,3 +85,6 @@ the :mod:`tokenize` module.
 .. versionchanged:: 3.7
   Removed :data:`AWAIT` and :data:`ASYNC` tokens. "async" and "await" are
   now tokenized as :data:`NAME` tokens.
+
+.. versionchanged:: 3.8
+   Added :data:`TYPE_COMMENT`.
--- a/Grammar/Grammar
+++ b/Grammar/Grammar
@@ -7,7 +7,9 @@
 #       single_input is a single interactive statement;
 #       file_input is a module or sequence of commands read from an input file;
 #       eval_input is the input for the eval() functions.
+#       func_type_input is a PEP 484 Python 2 function type comment
 # NB: compound_stmt in single_input is followed by extra NEWLINE!
+# NB: due to the way TYPE_COMMENT is tokenized it will always be followed by a NEWLINE
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
 file_input: (NEWLINE | stmt)* ENDMARKER
 eval_input: testlist NEWLINE* ENDMARKER
@@ -17,14 +19,14 @@ decorators: decorator+
 decorated: decorators (classdef | funcdef | async_funcdef)

 async_funcdef: 'async' funcdef
-funcdef: 'def' NAME parameters ['->' test] ':' suite
+funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] func_body_suite

 parameters: '(' [typedargslist] ')'
-typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
-        '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
-      | '**' tfpdef [',']]]
-  | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
-  | '**' tfpdef [','])
+typedargslist: (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [
+        '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
+      | '**' tfpdef [','] [TYPE_COMMENT]]])
+  | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
+  | '**' tfpdef [','] [TYPE_COMMENT])
 tfpdef: NAME [':' test]
 varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
        '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
@@ -39,7 +41,7 @@ simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
 small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
 expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
-                     ('=' (yield_expr|testlist_star_expr))*)
+                     [('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
 annassign: ':' test ['=' (yield_expr|testlist)]
 testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
 augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
@@ -71,13 +73,13 @@ compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef
 async_stmt: 'async' (funcdef | with_stmt | for_stmt)
 if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
 while_stmt: 'while' test ':' suite ['else' ':' suite]
-for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite]
 try_stmt: ('try' ':' suite
           ((except_clause ':' suite)+
            ['else' ':' suite]
            ['finally' ':' suite] |
           'finally' ':' suite))
-with_stmt: 'with' with_item (',' with_item)*  ':' suite
+with_stmt: 'with' with_item (',' with_item)*  ':' [TYPE_COMMENT] suite
 with_item: test ['as' expr]
 # NB compile.c makes sure that the default except clause is last
 except_clause: 'except' [test ['as' NAME]]
@@ -150,3 +152,14 @@ encoding_decl: NAME

 yield_expr: 'yield' [yield_arg]
 yield_arg: 'from' test | testlist_star_expr
+
+# the TYPE_COMMENT in suites is only parsed for funcdefs,
+# but can't go elsewhere due to ambiguity
+func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT
+
+func_type_input: func_type NEWLINE* ENDMARKER
+func_type: '(' [typelist] ')' '->' test
+# typelist is a modified typedargslist (see above)
+typelist: (test (',' test)* [','
+       ['*' [test] (',' test)* [',' '**' test] | '**' test]]
+     |  '*' [test] (',' test)* [',' '**' test] | '**' test)
--- a/Grammar/Tokens
+++ b/Grammar/Tokens
@@ -55,6 +55,8 @@ ELLIPSIS                '...'
 COLONEQUAL              ':='

 OP
+TYPE_IGNORE
+TYPE_COMMENT
 ERRORTOKEN

 # These aren't used by the C tokenizer but are needed for tokenize.py

--- a/Include/Python-ast.h
+++ b/Include/Python-ast.h
--- a/Include/compile.h
+++ b/Include/compile.h
@@ -22,6 +22,7 @@ PyAPI_FUNC(PyCodeObject *) PyNode_Compile(struct _node *, const char *);
 #define PyCF_DONT_IMPLY_DEDENT 0x0200
 #define PyCF_ONLY_AST 0x0400
 #define PyCF_IGNORE_COOKIE 0x0800
+#define PyCF_TYPE_COMMENTS 0x1000

 #ifndef Py_LIMITED_API
 typedef struct {
@@ -85,10 +86,10 @@ PyAPI_FUNC(int) _PyAST_Optimize(struct _mod *, PyArena *arena, int optimize);

 #endif /* !Py_LIMITED_API */

-/* These definitions must match corresponding definitions in graminit.h.
-   There's code in compile.c that checks that they are the same. */
+/* These definitions must match corresponding definitions in graminit.h. */
 #define Py_single_input 256
 #define Py_file_input 257
 #define Py_eval_input 258
+#define Py_func_type_input 345

 #endif /* !Py_COMPILE_H */
--- a/Include/graminit.h
+++ b/Include/graminit.h
@@ -88,3 +88,7 @@
 #define encoding_decl 341
 #define yield_expr 342
 #define yield_arg 343
+#define func_body_suite 344
+#define func_type_input 345
+#define func_type 346
+#define typelist 347
--- a/Include/parsetok.h
+++ b/Include/parsetok.h
@@ -37,6 +37,7 @@ typedef struct {

 #define PyPARSE_IGNORE_COOKIE 0x0010
 #define PyPARSE_BARRY_AS_BDFL 0x0020
+#define PyPARSE_TYPE_COMMENTS 0x0040

 PyAPI_FUNC(node *) PyParser_ParseString(const char *, grammar *, int,
                                              perrdetail *);

--- a/Include/token.h
+++ b/Include/token.h
@@ -65,8 +65,10 @@ extern "C" {
 #define ELLIPSIS        52
 #define COLONEQUAL      53
 #define OP              54
-#define ERRORTOKEN      55
-#define N_TOKENS        59
+#define TYPE_IGNORE     55
+#define TYPE_COMMENT    56
+#define ERRORTOKEN      57
+#define N_TOKENS        61
 #define NT_OFFSET       256

 /* Special definitions for cooperation with parser */

--- a/Lib/ast.py
+++ b/Lib/ast.py
@@ -27,12 +27,16 @@
 from _ast import *


-def parse(source, filename='<unknown>', mode='exec'):
+def parse(source, filename='<unknown>', mode='exec', *, type_comments=False):
    """
    Parse the source into an AST node.
    Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
+    Pass type_comments=True to get back type comments where the syntax allows.
    """
-    return compile(source, filename, mode, PyCF_ONLY_AST)
+    flags = PyCF_ONLY_AST
+    if type_comments:
+        flags |= PyCF_TYPE_COMMENTS
+    return compile(source, filename, mode, flags)


 def literal_eval(node_or_string):

--- a/Lib/symbol.py
+++ b/Lib/symbol.py
@@ -100,6 +100,10 @@ comp_if = 340
 encoding_decl = 341
 yield_expr = 342
 yield_arg = 343
+func_body_suite = 344
+func_type_input = 345
+func_type = 346
+typelist = 347
 #--end constants--

 sym_name = {}

--- a/Lib/test/test_asdl_parser.py
+++ b/Lib/test/test_asdl_parser.py
@@ -117,7 +117,8 @@ class TestAsdlParser(unittest.TestCase):

        v = CustomVisitor()
        v.visit(self.types['mod'])
-        self.assertEqual(v.names_with_seq, ['Module', 'Interactive', 'Suite'])
+        self.assertEqual(v.names_with_seq,
+                         ['Module', 'Module', 'Interactive', 'FunctionType', 'Suite'])


 if __name__ == '__main__':

--- a/Lib/test/test_ast.py
+++ b/Lib/test/test_ast.py
--- a/Lib/test/test_type_comments.py
+++ b/Lib/test/test_type_comments.py
+import ast
+import unittest
+
+
+funcdef = """\
+def foo():
+    # type: () -> int
+    pass
+
+def bar():  # type: () -> None
+    pass
+"""
+
+asyncdef = """\
+async def foo():
+    # type: () -> int
+    return await bar()
+
+async def bar():  # type: () -> int
+    return await bar()
+"""
+
+redundantdef = """\
+def foo():  # type: () -> int
+    # type: () -> str
+    return ''
+"""
+
+nonasciidef = """\
+def foo():
+    # type: () -> àçčéñt
+    pass
+"""
+
+forstmt = """\
+for a in []:  # type: int
+    pass
+"""
+
+withstmt = """\
+with context() as a:  # type: int
+    pass
+"""
+
+vardecl = """\
+a = 0  # type: int
+"""
+
+ignores = """\
+def foo():
+    pass  # type: ignore
+
+def bar():
+    x = 1  # type: ignore
+"""
+
+# Test for long-form type-comments in arguments.  A test function
+# named 'fabvk' would have two positional args, a and b, plus a
+# var-arg *v, plus a kw-arg **k.  It is verified in test_longargs()
+# that it has exactly these arguments, no more, no fewer.
+longargs = """\
+def fa(
+    a = 1,  # type: A
+):
+    pass
+
+def fa(
+    a = 1  # type: A
+):
+    pass
+
+def fab(
+    a,  # type: A
+    b,  # type: B
+):
+    pass
+
+def fab(
+    a,  # type: A
+    b  # type: B
+):
+    pass
+
+def fv(
+    *v,  # type: V
+):
+    pass
+
+def fv(
+    *v  # type: V
+):
+    pass
+
+def fk(
+    **k,  # type: K
+):
+    pass
+
+def fk(
+    **k  # type: K
+):
+    pass
+
+def fvk(
+    *v,  # type: V
+    **k,  # type: K
+):
+    pass
+
+def fvk(
+    *v,  # type: V
+    **k  # type: K
+):
+    pass
+
+def fav(
+    a,  # type: A
+    *v,  # type: V
+):
+    pass
+
+def fav(
+    a,  # type: A
+    *v  # type: V
+):
+    pass
+
+def fak(
+    a,  # type: A
+    **k,  # type: K
+):
+    pass
+
+def fak(
+    a,  # type: A
+    **k  # type: K
+):
+    pass
+
+def favk(
+    a,  # type: A
+    *v,  # type: V
+    **k,  # type: K
+):
+    pass
+
+def favk(
+    a,  # type: A
+    *v,  # type: V
+    **k  # type: K
+):
+    pass
+"""
+
+
+class TypeCommentTests(unittest.TestCase):
+
+    def parse(self, source):
+        return ast.parse(source, type_comments=True)
+
+    def classic_parse(self, source):
+        return ast.parse(source)
+
+    def test_funcdef(self):
+        tree = self.parse(funcdef)
+        self.assertEqual(tree.body[0].type_comment, "() -> int")
+        self.assertEqual(tree.body[1].type_comment, "() -> None")
+        tree = self.classic_parse(funcdef)
+        self.assertEqual(tree.body[0].type_comment, None)
+        self.assertEqual(tree.body[1].type_comment, None)
+
+    def test_asyncdef(self):
+        tree = self.parse(asyncdef)
+        self.assertEqual(tree.body[0].type_comment, "() -> int")
+        self.assertEqual(tree.body[1].type_comment, "() -> int")
+        tree = self.classic_parse(asyncdef)
+        self.assertEqual(tree.body[0].type_comment, None)
+        self.assertEqual(tree.body[1].type_comment, None)
+
+    def test_redundantdef(self):
+        with self.assertRaisesRegex(SyntaxError, "^Cannot have two type comments on def"):
+            tree = self.parse(redundantdef)
+
+    def test_nonasciidef(self):
+        tree = self.parse(nonasciidef)
+        self.assertEqual(tree.body[0].type_comment, "() -> àçčéñt")
+
+    def test_forstmt(self):
+        tree = self.parse(forstmt)
+        self.assertEqual(tree.body[0].type_comment, "int")
+        tree = self.classic_parse(forstmt)
+        self.assertEqual(tree.body[0].type_comment, None)
+
+    def test_withstmt(self):
+        tree = self.parse(withstmt)
+        self.assertEqual(tree.body[0].type_comment, "int")
+        tree = self.classic_parse(withstmt)
+        self.assertEqual(tree.body[0].type_comment, None)
+
+    def test_vardecl(self):
+        tree = self.parse(vardecl)
+        self.assertEqual(tree.body[0].type_comment, "int")
+        tree = self.classic_parse(vardecl)
+        self.assertEqual(tree.body[0].type_comment, None)
+
+    def test_ignores(self):
+        tree = self.parse(ignores)
+        self.assertEqual([ti.lineno for ti in tree.type_ignores], [2, 5])
+        tree = self.classic_parse(ignores)
+        self.assertEqual(tree.type_ignores, [])
+
+    def test_longargs(self):
+        tree = self.parse(longargs)
+        for t in tree.body:
+            # The expected args are encoded in the function name
+            todo = set(t.name[1:])
+            self.assertEqual(len(t.args.args),
+                             len(todo) - bool(t.args.vararg) - bool(t.args.kwarg))
+            self.assertTrue(t.name.startswith('f'), t.name)
+            for c in t.name[1:]:
+                todo.remove(c)
+                if c == 'v':
+                    arg = t.args.vararg
+                elif c == 'k':
+                    arg = t.args.kwarg
+                else:
+                    assert 0 <= ord(c) - ord('a') < len(t.args.args)
+                    arg = t.args.args[ord(c) - ord('a')]
+                self.assertEqual(arg.arg, c)  # That's the argument name
+                self.assertEqual(arg.type_comment, arg.arg.upper())
+            assert not todo
+        tree = self.classic_parse(longargs)
+        for t in tree.body:
+            for arg in t.args.args + [t.args.vararg, t.args.kwarg]:
+                if arg is not None:
+                    self.assertIsNone(arg.type_comment, "%s(%s:%r)" %
+                                      (t.name, arg.arg, arg.type_comment))
+
+    def test_inappropriate_type_comments(self):
+        """Tests for inappropriately-placed type comments.
+
+        These should be silently ignored with type comments off,
+        but raise SyntaxError with type comments on.
+
+        This is not meant to be exhaustive.
+        """
+
+        def check_both_ways(source):
+            ast.parse(source, type_comments=False)
+            with self.assertRaises(SyntaxError):
+                ast.parse(source, type_comments=True)
+
+        check_both_ways("pass  # type: int\n")
+        check_both_ways("foo()  # type: int\n")
+        check_both_ways("x += 1  # type: int\n")
+        check_both_ways("while True:  # type: int\n  continue\n")
+        check_both_ways("while True:\n  continue  # type: int\n")
+        check_both_ways("try:  # type: int\n  pass\nfinally:\n  pass\n")
+        check_both_ways("try:\n  pass\nfinally:  # type: int\n  pass\n")
+
+    def test_func_type_input(self):
+
+        def parse_func_type_input(source):
+            return ast.parse(source, "<unknown>", "func_type")
+
+        # Some checks below will crash if the returned structure is wrong
+        tree = parse_func_type_input("() -> int")
+        self.assertEqual(tree.argtypes, [])
+        self.assertEqual(tree.returns.id, "int")
+
+        tree = parse_func_type_input("(int) -> List[str]")
+        self.assertEqual(len(tree.argtypes), 1)
+        arg = tree.argtypes[0]
+        self.assertEqual(arg.id, "int")
+        self.assertEqual(tree.returns.value.id, "List")
+        self.assertEqual(tree.returns.slice.value.id, "str")
+
+        tree = parse_func_type_input("(int, *str, **Any) -> float")
+        self.assertEqual(tree.argtypes[0].id, "int")
+        self.assertEqual(tree.argtypes[1].id, "str")
+        self.assertEqual(tree.argtypes[2].id, "Any")
+        self.assertEqual(tree.returns.id, "float")
+
+        with self.assertRaises(SyntaxError):
+            tree = parse_func_type_input("(int, *str, *Any) -> float")
+
+        with self.assertRaises(SyntaxError):
+            tree = parse_func_type_input("(int, **str, Any) -> float")
+
+        with self.assertRaises(SyntaxError):
+            tree = parse_func_type_input("(**int, **str) -> float")
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/Lib/token.py
+++ b/Lib/token.py
@@ -58,12 +58,14 @@ RARROW = 51
 ELLIPSIS = 52
 COLONEQUAL = 53
 OP = 54
+TYPE_IGNORE = 55
+TYPE_COMMENT = 56
 # These aren't used by the C tokenizer but are needed for tokenize.py
-ERRORTOKEN = 55
-COMMENT = 56
-NL = 57
-ENCODING = 58
-N_TOKENS = 59
+ERRORTOKEN = 57
+COMMENT = 58
+NL = 59
+ENCODING = 60
+N_TOKENS = 61
 # Special definitions for cooperation with parser
 NT_OFFSET = 256


--- a/Misc/NEWS.d/next/Core and Builtins/2019-01-22-19-17-27.bpo-35766.gh1tHZ.rst
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-01-22-19-17-27.bpo-35766.gh1tHZ.rst
+Add the option to parse PEP 484 type comments in the ast module. (Off by default.) This is merging the key functionality of the third party fork thereof, [typed_ast](https://github.com/python/typed_ast).
\ No newline at end of file
--- a/Modules/parsermodule.c
+++ b/Modules/parsermodule.c
@@ -663,6 +663,12 @@ validate_node(node *tree)
    for (pos = 0; pos < nch; ++pos) {
        node *ch = CHILD(tree, pos);
        int ch_type = TYPE(ch);
+        if (ch_type == suite && TYPE(tree) == funcdef) {
+            /* This is the opposite hack of what we do in parser.c
+               (search for func_body_suite), except we don't ever
+               support type comments here. */
+            ch_type = func_body_suite;
+        }
        for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
            short a_label = dfa_state->s_arc[arc].a_lbl;
            assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels);

--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -3,17 +3,20 @@

 module Python
 {
-    mod = Module(stmt* body)
+    mod = Module(stmt* body, type_ignore *type_ignores)
        | Interactive(stmt* body)
        | Expression(expr body)
+        | FunctionType(expr* argtypes, expr returns)

        -- not really an actual node but useful in Jython's typesystem.
        | Suite(stmt* body)

    stmt = FunctionDef(identifier name, arguments args,
-                       stmt* body, expr* decorator_list, expr? returns)
+                       stmt* body, expr* decorator_list, expr? returns,
+                       string? type_comment)
          | AsyncFunctionDef(identifier name, arguments args,
-                             stmt* body, expr* decorator_list, expr? returns)
+                             stmt* body, expr* decorator_list, expr? returns,
+                             string? type_comment)

          | ClassDef(identifier name,
             expr* bases,
@@ -23,18 +26,18 @@ module Python
          | Return(expr? value)

          | Delete(expr* targets)
-          | Assign(expr* targets, expr value)
+          | Assign(expr* targets, expr value, string? type_comment)
          | AugAssign(expr target, operator op, expr value)
          -- 'simple' indicates that we annotate simple name without parens
          | AnnAssign(expr target, expr annotation, expr? value, int simple)

          -- use 'orelse' because else is a keyword in target languages
-          | For(expr target, expr iter, stmt* body, stmt* orelse)
-          | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse)
+          | For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
+          | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
          | While(expr test, stmt* body, stmt* orelse)
          | If(expr test, stmt* body, stmt* orelse)
-          | With(withitem* items, stmt* body)
-          | AsyncWith(withitem* items, stmt* body)
+          | With(withitem* items, stmt* body, string? type_comment)
+          | AsyncWith(withitem* items, stmt* body, string? type_comment)

          | Raise(expr? exc, expr? cause)
          | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
@@ -111,7 +114,7 @@ module Python
    arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults,
                 arg? kwarg, expr* defaults)

-    arg = (identifier arg, expr? annotation)
+    arg = (identifier arg, expr? annotation, string? type_comment)
           attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)

    -- keyword arguments supplied to call (NULL identifier for **kwargs)
@@ -121,5 +124,7 @@ module Python
    alias = (identifier name, identifier? asname)

    withitem = (expr context_expr, expr? optional_vars)
+
+    type_ignore = TypeIgnore(int lineno)
 }

--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -890,6 +890,15 @@ static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena)
    return obj2ast_object(obj, out, arena);
 }

+static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena)
+{
+    if (!PyUnicode_CheckExact(obj) && !PyBytes_CheckExact(obj)) {
+        PyErr_SetString(PyExc_TypeError, "AST string must be of type str");
+        return 1;
+    }
+    return obj2ast_object(obj, out, arena);
+}
+
 static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
 {
    int i;
@@ -993,6 +1002,8 @@ class ASTModuleVisitor(PickleVisitor):
        self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return NULL;', 1)
        self.emit('if (PyModule_AddIntMacro(m, PyCF_ONLY_AST) < 0)', 1)
        self.emit("return NULL;", 2)
+        self.emit('if (PyModule_AddIntMacro(m, PyCF_TYPE_COMMENTS) < 0)', 1)
+        self.emit("return NULL;", 2)
        for dfn in mod.dfns:
            self.visit(dfn)
        self.emit("return m;", 1)
@@ -1176,18 +1187,19 @@ PyObject* PyAST_mod2obj(mod_ty t)
 }

 /* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */
+/* and 3 for "func_type" */
 mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode)
 {
    mod_ty res;
    PyObject *req_type[3];
-    char *req_name[] = {"Module", "Expression", "Interactive"};
+    char *req_name[] = {"Module", "Expression", "Interactive", "FunctionType"};
    int isinstance;

    req_type[0] = (PyObject*)Module_type;
    req_type[1] = (PyObject*)Expression_type;
    req_type[2] = (PyObject*)Interactive_type;

-    assert(0 <= mode && mode <= 2);
+    assert(0 <= mode && mode <= 3);

    if (!init_types())
        return NULL;

--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -12,6 +12,7 @@
 #include "node.h"
 #include "parser.h"
 #include "errcode.h"
+#include "graminit.h"


 #ifdef Py_DEBUG
@@ -260,7 +261,15 @@ PyParser_AddToken(parser_state *ps, int type, char *str,
                    /* Push non-terminal */
                    int nt = (x >> 8) + NT_OFFSET;
                    int arrow = x & ((1<<7)-1);
-                    dfa *d1 = PyGrammar_FindDFA(
+                    dfa *d1;
+                    if (nt == func_body_suite && !(ps->p_flags & PyCF_TYPE_COMMENTS)) {
+                        /* When parsing type comments is not requested,
+                           we can provide better errors about bad indentation
+                           by using 'suite' for the body of a funcdef */
+                        D(printf(" [switch func_body_suite to suite]"));
+                        nt = suite;
+                    }
+                    d1 = PyGrammar_FindDFA(
                        ps->p_grammar, nt);
                    if ((err = push(&ps->p_stack, nt, d1,
                        arrow, lineno, col_offset,
@@ -268,7 +277,7 @@ PyParser_AddToken(parser_state *ps, int type, char *str,
                        D(printf(" MemError: push\n"));
                        return err;
                    }
-                    D(printf(" Push ...\n"));
+                    D(printf(" Push '%s'\n", d1->d_name));
                    continue;
                }


--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -15,6 +15,42 @@
 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
 static int initerr(perrdetail *err_ret, PyObject * filename);

+typedef struct {
+    int *items;
+    size_t size;
+    size_t num_items;
+} growable_int_array;
+
+static int
+growable_int_array_init(growable_int_array *arr, size_t initial_size) {
+    assert(initial_size > 0);
+    arr->items = malloc(initial_size * sizeof(*arr->items));
+    arr->size = initial_size;
+    arr->num_items = 0;
+
+    return arr->items != NULL;
+}
+
+static int
+growable_int_array_add(growable_int_array *arr, int item) {
+    if (arr->num_items >= arr->size) {
+        arr->size *= 2;
+        arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
+        if (!arr->items) {
+            return 0;
+        }
+    }
+
+    arr->items[arr->num_items] = item;
+    arr->num_items++;
+    return 1;
+}
+
+static void
+growable_int_array_deallocate(growable_int_array *arr) {
+    free(arr->items);
+}
+
 /* Parse input coming from a string.  Return error code, print some errors. */
 node *
 PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
@@ -59,6 +95,9 @@ PyParser_ParseStringObject(const char *s, PyObject *filename,
        err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
        return NULL;
    }
+    if (*flags & PyPARSE_TYPE_COMMENTS) {
+        tok->type_comments = 1;
+    }

 #ifndef PGEN
    Py_INCREF(err_ret->filename);
@@ -127,6 +166,9 @@ PyParser_ParseFileObject(FILE *fp, PyObject *filename,
        err_ret->error = E_NOMEM;
        return NULL;
    }
+    if (*flags & PyPARSE_TYPE_COMMENTS) {
+        tok->type_comments = 1;
+    }
 #ifndef PGEN
    Py_INCREF(err_ret->filename);
    tok->filename = err_ret->filename;
@@ -188,6 +230,13 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
    node *n;
    int started = 0;
    int col_offset, end_col_offset;
+    growable_int_array type_ignores;
+
+    if (!growable_int_array_init(&type_ignores, 10)) {
+        err_ret->error = E_NOMEM;
+        PyTokenizer_Free(tok);
+        return NULL;
+    }

    if ((ps = PyParser_New(g, start)) == NULL) {
        err_ret->error = E_NOMEM;
@@ -197,6 +246,8 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    if (*flags & PyPARSE_BARRY_AS_BDFL)
        ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
+    if (*flags & PyPARSE_TYPE_COMMENTS)
+        ps->p_flags |= PyCF_TYPE_COMMENTS;
 #endif

    for (;;) {
@@ -277,6 +328,15 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
        else {
            end_col_offset = -1;
        }
+
+        if (type == TYPE_IGNORE) {
+            if (!growable_int_array_add(&type_ignores, tok->lineno)) {
+                err_ret->error = E_NOMEM;
+                break;
+            }
+            continue;
+        }
+
        if ((err_ret->error =
             PyParser_AddToken(ps, (int)type, str,
                               lineno, col_offset, tok->lineno, end_col_offset,
@@ -293,6 +353,24 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
        n = ps->p_tree;
        ps->p_tree = NULL;

+        if (n->n_type == file_input) {
+            /* Put type_ignore nodes in the ENDMARKER of file_input. */
+            int num;
+            node *ch;
+            size_t i;
+
+            num = NCH(n);
+            ch = CHILD(n, num - 1);
+            REQ(ch, ENDMARKER);
+
+            for (i = 0; i < type_ignores.num_items; i++) {
+                PyNode_AddChild(ch, TYPE_IGNORE, NULL,
+                                type_ignores.items[i], 0,
+                                type_ignores.items[i], 0);
+            }
+        }
+        growable_int_array_deallocate(&type_ignores);
+
 #ifndef PGEN
        /* Check that the source for a single input statement really
           is a single statement by looking at what is left in the

--- a/Parser/token.c
+++ b/Parser/token.c
@@ -61,6 +61,8 @@ const char * const _PyParser_TokenNames[] = {
    "ELLIPSIS",
    "COLONEQUAL",
    "OP",
+    "TYPE_IGNORE",
+    "TYPE_COMMENT",
    "<ERRORTOKEN>",
    "<COMMENT>",
    "<NL>",

--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -48,6 +48,10 @@ static int tok_nextc(struct tok_state *tok);
 static void tok_backup(struct tok_state *tok, int c);


+/* Spaces in this constant are treated as "zero or more spaces or tabs" when
+   tokenizing. */
+static const char* type_comment_prefix = "# type: ";
+
 /* Create and initialize a new tok_state structure */

 static struct tok_state *
@@ -82,6 +86,7 @@ tok_new(void)
    tok->decoding_readline = NULL;
    tok->decoding_buffer = NULL;
 #endif
+    tok->type_comments = 0;

    return tok;
 }
@@ -1245,11 +1250,61 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
    /* Set start of current token */
    tok->start = tok->cur - 1;

-    /* Skip comment */
+    /* Skip comment, unless it's a type comment */
    if (c == '#') {
+        const char *prefix, *p, *type_start;
+
        while (c != EOF && c != '\n') {
            c = tok_nextc(tok);
        }
+
+        if (tok->type_comments) {
+            p = tok->start;
+            prefix = type_comment_prefix;
+            while (*prefix && p < tok->cur) {
+                if (*prefix == ' ') {
+                    while (*p == ' ' || *p == '\t') {
+                        p++;
+                    }
+                } else if (*prefix == *p) {
+                    p++;
+                } else {
+                    break;
+                }
+
+                prefix++;
+            }
+
+            /* This is a type comment if we matched all of type_comment_prefix. */
+            if (!*prefix) {
+                int is_type_ignore = 1;
+                tok_backup(tok, c);  /* don't eat the newline or EOF */
+
+                type_start = p;
+
+                is_type_ignore = tok->cur >= p + 6 && memcmp(p, "ignore", 6) == 0;
+                p += 6;
+                while (is_type_ignore && p < tok->cur) {
+                    if (*p == '#')
+                        break;
+                    is_type_ignore = is_type_ignore && (*p == ' ' || *p == '\t');
+                    p++;
+                }
+
+                if (is_type_ignore) {
+                    /* If this type ignore is the only thing on the line, consume the newline also. */
+                    if (blankline) {
+                        tok_nextc(tok);
+                        tok->atbol = 1;
+                    }
+                    return TYPE_IGNORE;
+                } else {
+                    *p_start = (char *) type_start;  /* after type_comment_prefix */
+                    *p_end = tok->cur;
+                    return TYPE_COMMENT;
+                }
+            }
+        }
    }

    /* Check for EOF and errors now */

--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -70,6 +70,8 @@ struct tok_state {
    const char* enc;        /* Encoding for the current str. */
    const char* str;
    const char* input; /* Tokenizer's newline translated copy of the string. */
+
+    int type_comments;      /* Whether to look for type comments */
 };

 extern struct tok_state *PyTokenizer_FromString(const char *, int);

--- a/Python/Python-ast.c
+++ b/Python/Python-ast.c
--- a/Python/ast.c
+++ b/Python/ast.c
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -765,13 +765,13 @@ builtin_compile_impl(PyObject *module, PyObject *source, PyObject *filename,
    int compile_mode = -1;
    int is_ast;
    PyCompilerFlags cf;
-    int start[] = {Py_file_input, Py_eval_input, Py_single_input};
+    int start[] = {Py_file_input, Py_eval_input, Py_single_input, Py_func_type_input};
    PyObject *result;

    cf.cf_flags = flags | PyCF_SOURCE_IS_UTF8;

    if (flags &
-        ~(PyCF_MASK | PyCF_MASK_OBSOLETE | PyCF_DONT_IMPLY_DEDENT | PyCF_ONLY_AST))
+        ~(PyCF_MASK | PyCF_MASK_OBSOLETE | PyCF_DONT_IMPLY_DEDENT | PyCF_ONLY_AST | PyCF_TYPE_COMMENTS))
    {
        PyErr_SetString(PyExc_ValueError,
                        "compile(): unrecognised flags");
@@ -795,9 +795,21 @@ builtin_compile_impl(PyObject *module, PyObject *source, PyObject *filename,
        compile_mode = 1;
    else if (strcmp(mode, "single") == 0)
        compile_mode = 2;
+    else if (strcmp(mode, "func_type") == 0) {
+        if (!(flags & PyCF_ONLY_AST)) {
+            PyErr_SetString(PyExc_ValueError,
+                            "compile() mode 'func_type' requires flag PyCF_ONLY_AST");
+            goto error;
+        }
+        compile_mode = 3;
+    }
    else {
-        PyErr_SetString(PyExc_ValueError,
-                        "compile() mode must be 'exec', 'eval' or 'single'");
+        const char *msg;
+        if (flags & PyCF_ONLY_AST)
+            msg = "compile() mode must be 'exec', 'eval', 'single' or 'func_type'";
+        else
+            msg = "compile() mode must be 'exec', 'eval' or 'single'";
+        PyErr_SetString(PyExc_ValueError, msg);
        goto error;
    }


--- a/Python/graminit.c
+++ b/Python/graminit.c
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -158,6 +158,8 @@ static int PARSER_FLAGS(PyCompilerFlags *flags)
        parser_flags |= PyPARSE_IGNORE_COOKIE;
    if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL)
        parser_flags |= PyPARSE_BARRY_AS_BDFL;
+    if (flags->cf_flags & PyCF_TYPE_COMMENTS)
+        parser_flags |= PyPARSE_TYPE_COMMENTS;
    return parser_flags;
 }