Commit dcfcd146 authored by Guido van Rossum's avatar Guido van Rossum Committed by Łukasz Langa

bpo-35766: Merge typed_ast back into CPython (GH-11645)

parent d97daebf
......@@ -126,16 +126,33 @@ The abstract grammar is currently defined as follows:
Apart from the node classes, the :mod:`ast` module defines these utility functions
and classes for traversing abstract syntax trees:
.. function:: parse(source, filename='<unknown>', mode='exec')
.. function:: parse(source, filename='<unknown>', mode='exec', *, type_comments=False)
Parse the source into an AST node. Equivalent to ``compile(source,
filename, mode, ast.PyCF_ONLY_AST)``.
If ``type_comments=True`` is given, the parser is modified to check
and return type comments as specified by :pep:`484` and :pep:`526`.
This is equivalent to adding :data:`ast.PyCF_TYPE_COMMENTS` to the
flags passed to :func:`compile()`. This will report syntax errors
for misplaced type comments. Without this flag, type comments will
be ignored, and the ``type_comment`` field on selected AST nodes
will always be ``None``. In addition, the locations of ``# type:
ignore`` comments will be returned as the ``type_ignores``
attribute of :class:`Module` (otherwise it is always an empty list).
In addition, if ``mode`` is ``'func_type'``, the input syntax is
modified to correspond to :pep:`484` "signature type comments",
e.g. ``(str, int) -> List[str]``.
.. warning::
It is possible to crash the Python interpreter with a
sufficiently large/complex string due to stack depth limitations
in Python's AST compiler.
.. versionchanged:: 3.8
Added ``type_comments=True`` and ``mode='func_type'``.
.. function:: literal_eval(node_or_string)
......@@ -203,6 +203,10 @@
.. data:: OP
.. data:: TYPE_IGNORE
.. data:: TYPE_COMMENT
.. data:: ERRORTOKEN
.. data:: N_TOKENS
......@@ -69,6 +69,13 @@ the :mod:`tokenize` module.
always be an ``ENCODING`` token.
.. data:: TYPE_COMMENT
Token value indicating that a type comment was recognized. Such
tokens are only produced when :func:`ast.parse()` is invoked with
.. versionchanged:: 3.5
Added :data:`AWAIT` and :data:`ASYNC` tokens.
......@@ -78,3 +85,6 @@ the :mod:`tokenize` module.
.. versionchanged:: 3.7
Removed :data:`AWAIT` and :data:`ASYNC` tokens. "async" and "await" are
now tokenized as :data:`NAME` tokens.
.. versionchanged:: 3.8
Added :data:`TYPE_COMMENT`.
......@@ -7,7 +7,9 @@
# single_input is a single interactive statement;
# file_input is a module or sequence of commands read from an input file;
# eval_input is the input for the eval() functions.
# func_type_input is a PEP 484 Python 2 function type comment
# NB: compound_stmt in single_input is followed by extra NEWLINE!
# NB: due to the way TYPE_COMMENT is tokenized it will always be followed by a NEWLINE
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: (NEWLINE | stmt)* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
......@@ -17,14 +19,14 @@ decorators: decorator+
decorated: decorators (classdef | funcdef | async_funcdef)
async_funcdef: 'async' funcdef
funcdef: 'def' NAME parameters ['->' test] ':' suite
funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] func_body_suite
parameters: '(' [typedargslist] ')'
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [',']]]
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [','])
typedargslist: (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [
'*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
| '**' tfpdef [','] [TYPE_COMMENT]]])
| '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
| '**' tfpdef [','] [TYPE_COMMENT])
tfpdef: NAME [':' test]
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
......@@ -39,7 +41,7 @@ simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
('=' (yield_expr|testlist_star_expr))*)
[('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
annassign: ':' test ['=' (yield_expr|testlist)]
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
......@@ -71,13 +73,13 @@ compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
while_stmt: 'while' test ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite]
try_stmt: ('try' ':' suite
((except_clause ':' suite)+
['else' ':' suite]
['finally' ':' suite] |
'finally' ':' suite))
with_stmt: 'with' with_item (',' with_item)* ':' suite
with_stmt: 'with' with_item (',' with_item)* ':' [TYPE_COMMENT] suite
with_item: test ['as' expr]
# NB compile.c makes sure that the default except clause is last
except_clause: 'except' [test ['as' NAME]]
......@@ -150,3 +152,14 @@ encoding_decl: NAME
yield_expr: 'yield' [yield_arg]
yield_arg: 'from' test | testlist_star_expr
# the TYPE_COMMENT in suites is only parsed for funcdefs,
# but can't go elsewhere due to ambiguity
func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT
func_type_input: func_type NEWLINE* ENDMARKER
func_type: '(' [typelist] ')' '->' test
# typelist is a modified typedargslist (see above)
typelist: (test (',' test)* [','
['*' [test] (',' test)* [',' '**' test] | '**' test]]
| '*' [test] (',' test)* [',' '**' test] | '**' test)
......@@ -55,6 +55,8 @@ ELLIPSIS '...'
# These aren't used by the C tokenizer but are needed for
This diff is collapsed.
......@@ -22,6 +22,7 @@ PyAPI_FUNC(PyCodeObject *) PyNode_Compile(struct _node *, const char *);
#define PyCF_DONT_IMPLY_DEDENT 0x0200
#define PyCF_ONLY_AST 0x0400
#define PyCF_IGNORE_COOKIE 0x0800
#define PyCF_TYPE_COMMENTS 0x1000
#ifndef Py_LIMITED_API
typedef struct {
......@@ -85,10 +86,10 @@ PyAPI_FUNC(int) _PyAST_Optimize(struct _mod *, PyArena *arena, int optimize);
#endif /* !Py_LIMITED_API */
/* These definitions must match corresponding definitions in graminit.h.
There's code in compile.c that checks that they are the same. */
/* These definitions must match corresponding definitions in graminit.h. */
#define Py_single_input 256
#define Py_file_input 257
#define Py_eval_input 258
#define Py_func_type_input 345
#endif /* !Py_COMPILE_H */
......@@ -88,3 +88,7 @@
#define encoding_decl 341
#define yield_expr 342
#define yield_arg 343
#define func_body_suite 344
#define func_type_input 345
#define func_type 346
#define typelist 347
......@@ -37,6 +37,7 @@ typedef struct {
#define PyPARSE_IGNORE_COOKIE 0x0010
#define PyPARSE_BARRY_AS_BDFL 0x0020
#define PyPARSE_TYPE_COMMENTS 0x0040
PyAPI_FUNC(node *) PyParser_ParseString(const char *, grammar *, int,
perrdetail *);
......@@ -65,8 +65,10 @@ extern "C" {
#define ELLIPSIS 52
#define COLONEQUAL 53
#define OP 54
#define ERRORTOKEN 55
#define N_TOKENS 59
#define TYPE_IGNORE 55
#define TYPE_COMMENT 56
#define ERRORTOKEN 57
#define N_TOKENS 61
#define NT_OFFSET 256
/* Special definitions for cooperation with parser */
......@@ -27,12 +27,16 @@
from _ast import *
def parse(source, filename='<unknown>', mode='exec'):
def parse(source, filename='<unknown>', mode='exec', *, type_comments=False):
Parse the source into an AST node.
Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
Pass type_comments=True to get back type comments where the syntax allows.
return compile(source, filename, mode, PyCF_ONLY_AST)
flags = PyCF_ONLY_AST
if type_comments:
return compile(source, filename, mode, flags)
def literal_eval(node_or_string):
......@@ -100,6 +100,10 @@ comp_if = 340
encoding_decl = 341
yield_expr = 342
yield_arg = 343
func_body_suite = 344
func_type_input = 345
func_type = 346
typelist = 347
#--end constants--
sym_name = {}
......@@ -117,7 +117,8 @@ class TestAsdlParser(unittest.TestCase):
v = CustomVisitor()
self.assertEqual(v.names_with_seq, ['Module', 'Interactive', 'Suite'])
['Module', 'Module', 'Interactive', 'FunctionType', 'Suite'])
if __name__ == '__main__':
This diff is collapsed.
import ast
import unittest
funcdef = """\
def foo():
# type: () -> int
def bar(): # type: () -> None
asyncdef = """\
async def foo():
# type: () -> int
return await bar()
async def bar(): # type: () -> int
return await bar()
redundantdef = """\
def foo(): # type: () -> int
# type: () -> str
return ''
nonasciidef = """\
def foo():
# type: () -> àçčéñt
forstmt = """\
for a in []: # type: int
withstmt = """\
with context() as a: # type: int
vardecl = """\
a = 0 # type: int
ignores = """\
def foo():
pass # type: ignore
def bar():
x = 1 # type: ignore
# Test for long-form type-comments in arguments. A test function
# named 'fabvk' would have two positional args, a and b, plus a
# var-arg *v, plus a kw-arg **k. It is verified in test_longargs()
# that it has exactly these arguments, no more, no fewer.
longargs = """\
def fa(
a = 1, # type: A
def fa(
a = 1 # type: A
def fab(
a, # type: A
b, # type: B
def fab(
a, # type: A
b # type: B
def fv(
*v, # type: V
def fv(
*v # type: V
def fk(
**k, # type: K
def fk(
**k # type: K
def fvk(
*v, # type: V
**k, # type: K
def fvk(
*v, # type: V
**k # type: K
def fav(
a, # type: A
*v, # type: V
def fav(
a, # type: A
*v # type: V
def fak(
a, # type: A
**k, # type: K
def fak(
a, # type: A
**k # type: K
def favk(
a, # type: A
*v, # type: V
**k, # type: K
def favk(
a, # type: A
*v, # type: V
**k # type: K
class TypeCommentTests(unittest.TestCase):
def parse(self, source):
return ast.parse(source, type_comments=True)
def classic_parse(self, source):
return ast.parse(source)
def test_funcdef(self):
tree = self.parse(funcdef)
self.assertEqual(tree.body[0].type_comment, "() -> int")
self.assertEqual(tree.body[1].type_comment, "() -> None")
tree = self.classic_parse(funcdef)
self.assertEqual(tree.body[0].type_comment, None)
self.assertEqual(tree.body[1].type_comment, None)
def test_asyncdef(self):
tree = self.parse(asyncdef)
self.assertEqual(tree.body[0].type_comment, "() -> int")
self.assertEqual(tree.body[1].type_comment, "() -> int")
tree = self.classic_parse(asyncdef)
self.assertEqual(tree.body[0].type_comment, None)
self.assertEqual(tree.body[1].type_comment, None)
def test_redundantdef(self):
with self.assertRaisesRegex(SyntaxError, "^Cannot have two type comments on def"):
tree = self.parse(redundantdef)
def test_nonasciidef(self):
tree = self.parse(nonasciidef)
self.assertEqual(tree.body[0].type_comment, "() -> àçčéñt")
def test_forstmt(self):
tree = self.parse(forstmt)
self.assertEqual(tree.body[0].type_comment, "int")
tree = self.classic_parse(forstmt)
self.assertEqual(tree.body[0].type_comment, None)
def test_withstmt(self):
tree = self.parse(withstmt)
self.assertEqual(tree.body[0].type_comment, "int")
tree = self.classic_parse(withstmt)
self.assertEqual(tree.body[0].type_comment, None)
def test_vardecl(self):
tree = self.parse(vardecl)
self.assertEqual(tree.body[0].type_comment, "int")
tree = self.classic_parse(vardecl)
self.assertEqual(tree.body[0].type_comment, None)
def test_ignores(self):
tree = self.parse(ignores)
self.assertEqual([ti.lineno for ti in tree.type_ignores], [2, 5])
tree = self.classic_parse(ignores)
self.assertEqual(tree.type_ignores, [])
def test_longargs(self):
tree = self.parse(longargs)
for t in tree.body:
# The expected args are encoded in the function name
todo = set([1:])
len(todo) - bool(t.args.vararg) - bool(t.args.kwarg))
for c in[1:]:
if c == 'v':
arg = t.args.vararg
elif c == 'k':
arg = t.args.kwarg
assert 0 <= ord(c) - ord('a') < len(t.args.args)
arg = t.args.args[ord(c) - ord('a')]
self.assertEqual(arg.arg, c) # That's the argument name
self.assertEqual(arg.type_comment, arg.arg.upper())
assert not todo
tree = self.classic_parse(longargs)
for t in tree.body:
for arg in t.args.args + [t.args.vararg, t.args.kwarg]:
if arg is not None:
self.assertIsNone(arg.type_comment, "%s(%s:%r)" %
(, arg.arg, arg.type_comment))
def test_inappropriate_type_comments(self):
"""Tests for inappropriately-placed type comments.
These should be silently ignored with type comments off,
but raise SyntaxError with type comments on.
This is not meant to be exhaustive.
def check_both_ways(source):
ast.parse(source, type_comments=False)
with self.assertRaises(SyntaxError):
ast.parse(source, type_comments=True)
check_both_ways("pass # type: int\n")
check_both_ways("foo() # type: int\n")
check_both_ways("x += 1 # type: int\n")
check_both_ways("while True: # type: int\n continue\n")
check_both_ways("while True:\n continue # type: int\n")
check_both_ways("try: # type: int\n pass\nfinally:\n pass\n")
check_both_ways("try:\n pass\nfinally: # type: int\n pass\n")
def test_func_type_input(self):
def parse_func_type_input(source):
return ast.parse(source, "<unknown>", "func_type")
# Some checks below will crash if the returned structure is wrong
tree = parse_func_type_input("() -> int")
self.assertEqual(tree.argtypes, [])
self.assertEqual(, "int")
tree = parse_func_type_input("(int) -> List[str]")
self.assertEqual(len(tree.argtypes), 1)
arg = tree.argtypes[0]
self.assertEqual(, "int")
self.assertEqual(, "List")
self.assertEqual(, "str")
tree = parse_func_type_input("(int, *str, **Any) -> float")
self.assertEqual(tree.argtypes[0].id, "int")
self.assertEqual(tree.argtypes[1].id, "str")
self.assertEqual(tree.argtypes[2].id, "Any")
self.assertEqual(, "float")
with self.assertRaises(SyntaxError):
tree = parse_func_type_input("(int, *str, *Any) -> float")
with self.assertRaises(SyntaxError):
tree = parse_func_type_input("(int, **str, Any) -> float")
with self.assertRaises(SyntaxError):
tree = parse_func_type_input("(**int, **str) -> float")
if __name__ == '__main__':
......@@ -58,12 +58,14 @@ RARROW = 51
OP = 54
# These aren't used by the C tokenizer but are needed for
NL = 57
NL = 59
# Special definitions for cooperation with parser
Add the option to parse PEP 484 type comments in the ast module. (Off by default.) This is merging the key functionality of the third party fork thereof, [typed_ast](
\ No newline at end of file
......@@ -663,6 +663,12 @@ validate_node(node *tree)
for (pos = 0; pos < nch; ++pos) {
node *ch = CHILD(tree, pos);
int ch_type = TYPE(ch);
if (ch_type == suite && TYPE(tree) == funcdef) {
/* This is the opposite hack of what we do in parser.c
(search for func_body_suite), except we don't ever
support type comments here. */
ch_type = func_body_suite;
for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
short a_label = dfa_state->s_arc[arc].a_lbl;
assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels);
......@@ -3,17 +3,20 @@
module Python
mod = Module(stmt* body)
mod = Module(stmt* body, type_ignore *type_ignores)
| Interactive(stmt* body)
| Expression(expr body)
| FunctionType(expr* argtypes, expr returns)
-- not really an actual node but useful in Jython's typesystem.
| Suite(stmt* body)
stmt = FunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list, expr? returns)
stmt* body, expr* decorator_list, expr? returns,
string? type_comment)
| AsyncFunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list, expr? returns)
stmt* body, expr* decorator_list, expr? returns,
string? type_comment)
| ClassDef(identifier name,
expr* bases,
......@@ -23,18 +26,18 @@ module Python
| Return(expr? value)
| Delete(expr* targets)
| Assign(expr* targets, expr value)
| Assign(expr* targets, expr value, string? type_comment)
| AugAssign(expr target, operator op, expr value)
-- 'simple' indicates that we annotate simple name without parens
| AnnAssign(expr target, expr annotation, expr? value, int simple)
-- use 'orelse' because else is a keyword in target languages
| For(expr target, expr iter, stmt* body, stmt* orelse)
| AsyncFor(expr target, expr iter, stmt* body, stmt* orelse)
| For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
| AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
| While(expr test, stmt* body, stmt* orelse)
| If(expr test, stmt* body, stmt* orelse)
| With(withitem* items, stmt* body)
| AsyncWith(withitem* items, stmt* body)
| With(withitem* items, stmt* body, string? type_comment)
| AsyncWith(withitem* items, stmt* body, string? type_comment)
| Raise(expr? exc, expr? cause)
| Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
......@@ -111,7 +114,7 @@ module Python
arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults,
arg? kwarg, expr* defaults)
arg = (identifier arg, expr? annotation)
arg = (identifier arg, expr? annotation, string? type_comment)
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
-- keyword arguments supplied to call (NULL identifier for **kwargs)
......@@ -121,5 +124,7 @@ module Python
alias = (identifier name, identifier? asname)
withitem = (expr context_expr, expr? optional_vars)
type_ignore = TypeIgnore(int lineno)
......@@ -890,6 +890,15 @@ static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena)
return obj2ast_object(obj, out, arena);
static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena)
if (!PyUnicode_CheckExact(obj) && !PyBytes_CheckExact(obj)) {
PyErr_SetString(PyExc_TypeError, "AST string must be of type str");
return 1;
return obj2ast_object(obj, out, arena);
static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
int i;
......@@ -993,6 +1002,8 @@ class ASTModuleVisitor(PickleVisitor):
self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return NULL;', 1)
self.emit('if (PyModule_AddIntMacro(m, PyCF_ONLY_AST) < 0)', 1)
self.emit("return NULL;", 2)
self.emit('if (PyModule_AddIntMacro(m, PyCF_TYPE_COMMENTS) < 0)', 1)
self.emit("return NULL;", 2)
for dfn in mod.dfns:
self.emit("return m;", 1)
......@@ -1176,18 +1187,19 @@ PyObject* PyAST_mod2obj(mod_ty t)
/* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */
/* and 3 for "func_type" */
mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode)
mod_ty res;
PyObject *req_type[3];
char *req_name[] = {"Module", "Expression", "Interactive"};
char *req_name[] = {"Module", "Expression", "Interactive", "FunctionType"};
int isinstance;
req_type[0] = (PyObject*)Module_type;
req_type[1] = (PyObject*)Expression_type;
req_type[2] = (PyObject*)Interactive_type;
assert(0 <= mode && mode <= 2);
assert(0 <= mode && mode <= 3);
if (!init_types())
return NULL;
......@@ -12,6 +12,7 @@
#include "node.h"
#include "parser.h"
#include "errcode.h"
#include "graminit.h"
#ifdef Py_DEBUG
......@@ -260,7 +261,15 @@ PyParser_AddToken(parser_state *ps, int type, char *str,
/* Push non-terminal */
int nt = (x >> 8) + NT_OFFSET;
int arrow = x & ((1<<7)-1);
dfa *d1 = PyGrammar_FindDFA(
dfa *d1;
if (nt == func_body_suite && !(ps->p_flags & PyCF_TYPE_COMMENTS)) {
/* When parsing type comments is not requested,
we can provide better errors about bad indentation
by using 'suite' for the body of a funcdef */
D(printf(" [switch func_body_suite to suite]"));
nt = suite;
d1 = PyGrammar_FindDFA(
ps->p_grammar, nt);
if ((err = push(&ps->p_stack, nt, d1,
arrow, lineno, col_offset,
......@@ -268,7 +277,7 @@ PyParser_AddToken(parser_state *ps, int type, char *str,
D(printf(" MemError: push\n"));
return err;
D(printf(" Push ...\n"));
D(printf(" Push '%s'\n", d1->d_name));
......@@ -15,6 +15,42 @@
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
static int initerr(perrdetail *err_ret, PyObject * filename);
typedef struct {
int *items;
size_t size;
size_t num_items;
} growable_int_array;
static int
growable_int_array_init(growable_int_array *arr, size_t initial_size) {
assert(initial_size > 0);
arr->items = malloc(initial_size * sizeof(*arr->items));
arr->size = initial_size;
arr->num_items = 0;
return arr->items != NULL;
static int
growable_int_array_add(growable_int_array *arr, int item) {
if (arr->num_items >= arr->size) {
arr->size *= 2;
arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
if (!arr->items) {
return 0;
arr->items[arr->num_items] = item;
return 1;
static void
growable_int_array_deallocate(growable_int_array *arr) {
/* Parse input coming from a string. Return error code, print some errors. */
node *
PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
......@@ -59,6 +95,9 @@ PyParser_ParseStringObject(const char *s, PyObject *filename,
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
if (*flags & PyPARSE_TYPE_COMMENTS) {
tok->type_comments = 1;
#ifndef PGEN
......@@ -127,6 +166,9 @@ PyParser_ParseFileObject(FILE *fp, PyObject *filename,
err_ret->error = E_NOMEM;
return NULL;
if (*flags & PyPARSE_TYPE_COMMENTS) {
tok->type_comments = 1;
#ifndef PGEN
tok->filename = err_ret->filename;
......@@ -188,6 +230,13 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
node *n;
int started = 0;
int col_offset, end_col_offset;
growable_int_array type_ignores;
if (!growable_int_array_init(&type_ignores, 10)) {
err_ret->error = E_NOMEM;
return NULL;
if ((ps = PyParser_New(g, start)) == NULL) {
err_ret->error = E_NOMEM;
......@@ -197,6 +246,8 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
if (*flags & PyPARSE_BARRY_AS_BDFL)
ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
ps->p_flags |= PyCF_TYPE_COMMENTS;
for (;;) {
......@@ -277,6 +328,15 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
else {
end_col_offset = -1;
if (type == TYPE_IGNORE) {
if (!growable_int_array_add(&type_ignores, tok->lineno)) {
err_ret->error = E_NOMEM;
if ((err_ret->error =
PyParser_AddToken(ps, (int)type, str,
lineno, col_offset, tok->lineno, end_col_offset,
......@@ -293,6 +353,24 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
n = ps->p_tree;
ps->p_tree = NULL;
if (n->n_type == file_input) {
/* Put type_ignore nodes in the ENDMARKER of file_input. */
int num;
node *ch;
size_t i;
num = NCH(n);
ch = CHILD(n, num - 1);
for (i = 0; i < type_ignores.num_items; i++) {
PyNode_AddChild(ch, TYPE_IGNORE, NULL,
type_ignores.items[i], 0,
type_ignores.items[i], 0);
#ifndef PGEN
/* Check that the source for a single input statement really
is a single statement by looking at what is left in the
......@@ -61,6 +61,8 @@ const char * const _PyParser_TokenNames[] = {
......@@ -48,6 +48,10 @@ static int tok_nextc(struct tok_state *tok);
static void tok_backup(struct tok_state *tok, int c);
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
tokenizing. */
static const char* type_comment_prefix = "# type: ";
/* Create and initialize a new tok_state structure */
static struct tok_state *
......@@ -82,6 +86,7 @@ tok_new(void)
tok->decoding_readline = NULL;
tok->decoding_buffer = NULL;
tok->type_comments = 0;
return tok;
......@@ -1245,11 +1250,61 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
/* Set start of current token */
tok->start = tok->cur - 1;
/* Skip comment */
/* Skip comment, unless it's a type comment */
if (c == '#') {
const char *prefix, *p, *type_start;
while (c != EOF && c != '\n') {
c = tok_nextc(tok);
if (tok->type_comments) {
p = tok->start;
prefix = type_comment_prefix;
while (*prefix && p < tok->cur) {
if (*prefix == ' ') {
while (*p == ' ' || *p == '\t') {
} else if (*prefix == *p) {
} else {
/* This is a type comment if we matched all of type_comment_prefix. */
if (!*prefix) {
int is_type_ignore = 1;
tok_backup(tok, c); /* don't eat the newline or EOF */
type_start = p;
is_type_ignore = tok->cur >= p + 6 && memcmp(p, "ignore", 6) == 0;
p += 6;
while (is_type_ignore && p < tok->cur) {
if (*p == '#')
is_type_ignore = is_type_ignore && (*p == ' ' || *p == '\t');
if (is_type_ignore) {
/* If this type ignore is the only thing on the line, consume the newline also. */
if (blankline) {
tok->atbol = 1;
} else {
*p_start = (char *) type_start; /* after type_comment_prefix */
*p_end = tok->cur;
/* Check for EOF and errors now */
......@@ -70,6 +70,8 @@ struct tok_state {
const char* enc; /* Encoding for the current str. */
const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
int type_comments; /* Whether to look for type comments */
extern struct tok_state *PyTokenizer_FromString(const char *, int);
This diff is collapsed.
This diff is collapsed.
......@@ -765,13 +765,13 @@ builtin_compile_impl(PyObject *module, PyObject *source, PyObject *filename,
int compile_mode = -1;
int is_ast;
PyCompilerFlags cf;
int start[] = {Py_file_input, Py_eval_input, Py_single_input};
int start[] = {Py_file_input, Py_eval_input, Py_single_input, Py_func_type_input};
PyObject *result;
cf.cf_flags = flags | PyCF_SOURCE_IS_UTF8;
if (flags &
"compile(): unrecognised flags");
......@@ -795,9 +795,21 @@ builtin_compile_impl(PyObject *module, PyObject *source, PyObject *filename,
compile_mode = 1;
else if (strcmp(mode, "single") == 0)
compile_mode = 2;
else if (strcmp(mode, "func_type") == 0) {
if (!(flags & PyCF_ONLY_AST)) {
"compile() mode 'func_type' requires flag PyCF_ONLY_AST");
goto error;
compile_mode = 3;
else {
"compile() mode must be 'exec', 'eval' or 'single'");
const char *msg;
if (flags & PyCF_ONLY_AST)
msg = "compile() mode must be 'exec', 'eval', 'single' or 'func_type'";
msg = "compile() mode must be 'exec', 'eval' or 'single'";
PyErr_SetString(PyExc_ValueError, msg);
goto error;
This diff is collapsed.
......@@ -158,6 +158,8 @@ static int PARSER_FLAGS(PyCompilerFlags *flags)
parser_flags |= PyPARSE_IGNORE_COOKIE;
if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL)
parser_flags |= PyPARSE_BARRY_AS_BDFL;
if (flags->cf_flags & PyCF_TYPE_COMMENTS)
parser_flags |= PyPARSE_TYPE_COMMENTS;
return parser_flags;
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment