Commit dcfcd146 authored by Guido van Rossum's avatar Guido van Rossum Committed by Łukasz Langa

bpo-35766: Merge typed_ast back into CPython (GH-11645)

parent d97daebf
......@@ -126,16 +126,33 @@ The abstract grammar is currently defined as follows:
Apart from the node classes, the :mod:`ast` module defines these utility functions
and classes for traversing abstract syntax trees:
.. function:: parse(source, filename='<unknown>', mode='exec')
.. function:: parse(source, filename='<unknown>', mode='exec', *, type_comments=False)
Parse the source into an AST node. Equivalent to ``compile(source,
filename, mode, ast.PyCF_ONLY_AST)``.
If ``type_comments=True`` is given, the parser is modified to check
and return type comments as specified by :pep:`484` and :pep:`526`.
This is equivalent to adding :data:`ast.PyCF_TYPE_COMMENTS` to the
flags passed to :func:`compile()`. This will report syntax errors
for misplaced type comments. Without this flag, type comments will
be ignored, and the ``type_comment`` field on selected AST nodes
will always be ``None``. In addition, the locations of ``# type:
ignore`` comments will be returned as the ``type_ignores``
attribute of :class:`Module` (otherwise it is always an empty list).
In addition, if ``mode`` is ``'func_type'``, the input syntax is
modified to correspond to :pep:`484` "signature type comments",
e.g. ``(str, int) -> List[str]``.
.. warning::
It is possible to crash the Python interpreter with a
sufficiently large/complex string due to stack depth limitations
in Python's AST compiler.
.. versionchanged:: 3.8
Added ``type_comments=True`` and ``mode='func_type'``.
.. function:: literal_eval(node_or_string)
......
......@@ -203,6 +203,10 @@
.. data:: OP
.. data:: TYPE_IGNORE
.. data:: TYPE_COMMENT
.. data:: ERRORTOKEN
.. data:: N_TOKENS
......
......@@ -69,6 +69,13 @@ the :mod:`tokenize` module.
always be an ``ENCODING`` token.
.. data:: TYPE_COMMENT
Token value indicating that a type comment was recognized. Such
tokens are only produced when :func:`ast.parse()` is invoked with
``type_comments=True``.
.. versionchanged:: 3.5
Added :data:`AWAIT` and :data:`ASYNC` tokens.
......@@ -78,3 +85,6 @@ the :mod:`tokenize` module.
.. versionchanged:: 3.7
Removed :data:`AWAIT` and :data:`ASYNC` tokens. "async" and "await" are
now tokenized as :data:`NAME` tokens.
.. versionchanged:: 3.8
Added :data:`TYPE_COMMENT`.
......@@ -7,7 +7,9 @@
# single_input is a single interactive statement;
# file_input is a module or sequence of commands read from an input file;
# eval_input is the input for the eval() functions.
# func_type_input is a PEP 484 Python 2 function type comment
# NB: compound_stmt in single_input is followed by extra NEWLINE!
# NB: due to the way TYPE_COMMENT is tokenized it will always be followed by a NEWLINE
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: (NEWLINE | stmt)* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
......@@ -17,14 +19,14 @@ decorators: decorator+
decorated: decorators (classdef | funcdef | async_funcdef)
async_funcdef: 'async' funcdef
funcdef: 'def' NAME parameters ['->' test] ':' suite
funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] func_body_suite
parameters: '(' [typedargslist] ')'
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [',']]]
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [','])
typedargslist: (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [
'*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
| '**' tfpdef [','] [TYPE_COMMENT]]])
| '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
| '**' tfpdef [','] [TYPE_COMMENT])
tfpdef: NAME [':' test]
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
......@@ -39,7 +41,7 @@ simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
('=' (yield_expr|testlist_star_expr))*)
[('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
annassign: ':' test ['=' (yield_expr|testlist)]
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
......@@ -71,13 +73,13 @@ compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
while_stmt: 'while' test ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite]
try_stmt: ('try' ':' suite
((except_clause ':' suite)+
['else' ':' suite]
['finally' ':' suite] |
'finally' ':' suite))
with_stmt: 'with' with_item (',' with_item)* ':' suite
with_stmt: 'with' with_item (',' with_item)* ':' [TYPE_COMMENT] suite
with_item: test ['as' expr]
# NB compile.c makes sure that the default except clause is last
except_clause: 'except' [test ['as' NAME]]
......@@ -150,3 +152,14 @@ encoding_decl: NAME
yield_expr: 'yield' [yield_arg]
yield_arg: 'from' test | testlist_star_expr
# the TYPE_COMMENT in suites is only parsed for funcdefs,
# but can't go elsewhere due to ambiguity
func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT
func_type_input: func_type NEWLINE* ENDMARKER
func_type: '(' [typelist] ')' '->' test
# typelist is a modified typedargslist (see above)
typelist: (test (',' test)* [','
['*' [test] (',' test)* [',' '**' test] | '**' test]]
| '*' [test] (',' test)* [',' '**' test] | '**' test)
......@@ -55,6 +55,8 @@ ELLIPSIS '...'
COLONEQUAL ':='
OP
TYPE_IGNORE
TYPE_COMMENT
ERRORTOKEN
# These aren't used by the C tokenizer but are needed for tokenize.py
......
This diff is collapsed.
......@@ -22,6 +22,7 @@ PyAPI_FUNC(PyCodeObject *) PyNode_Compile(struct _node *, const char *);
#define PyCF_DONT_IMPLY_DEDENT 0x0200
#define PyCF_ONLY_AST 0x0400
#define PyCF_IGNORE_COOKIE 0x0800
#define PyCF_TYPE_COMMENTS 0x1000
#ifndef Py_LIMITED_API
typedef struct {
......@@ -85,10 +86,10 @@ PyAPI_FUNC(int) _PyAST_Optimize(struct _mod *, PyArena *arena, int optimize);
#endif /* !Py_LIMITED_API */
/* These definitions must match corresponding definitions in graminit.h.
There's code in compile.c that checks that they are the same. */
/* These definitions must match corresponding definitions in graminit.h. */
#define Py_single_input 256
#define Py_file_input 257
#define Py_eval_input 258
#define Py_func_type_input 345
#endif /* !Py_COMPILE_H */
......@@ -88,3 +88,7 @@
#define encoding_decl 341
#define yield_expr 342
#define yield_arg 343
#define func_body_suite 344
#define func_type_input 345
#define func_type 346
#define typelist 347
......@@ -37,6 +37,7 @@ typedef struct {
#define PyPARSE_IGNORE_COOKIE 0x0010
#define PyPARSE_BARRY_AS_BDFL 0x0020
#define PyPARSE_TYPE_COMMENTS 0x0040
PyAPI_FUNC(node *) PyParser_ParseString(const char *, grammar *, int,
perrdetail *);
......
......@@ -65,8 +65,10 @@ extern "C" {
#define ELLIPSIS 52
#define COLONEQUAL 53
#define OP 54
#define ERRORTOKEN 55
#define N_TOKENS 59
#define TYPE_IGNORE 55
#define TYPE_COMMENT 56
#define ERRORTOKEN 57
#define N_TOKENS 61
#define NT_OFFSET 256
/* Special definitions for cooperation with parser */
......
......@@ -27,12 +27,16 @@
from _ast import *
def parse(source, filename='<unknown>', mode='exec'):
def parse(source, filename='<unknown>', mode='exec', *, type_comments=False):
"""
Parse the source into an AST node.
Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
Pass type_comments=True to get back type comments where the syntax allows.
"""
return compile(source, filename, mode, PyCF_ONLY_AST)
flags = PyCF_ONLY_AST
if type_comments:
flags |= PyCF_TYPE_COMMENTS
return compile(source, filename, mode, flags)
def literal_eval(node_or_string):
......
......@@ -100,6 +100,10 @@ comp_if = 340
encoding_decl = 341
yield_expr = 342
yield_arg = 343
func_body_suite = 344
func_type_input = 345
func_type = 346
typelist = 347
#--end constants--
sym_name = {}
......
......@@ -117,7 +117,8 @@ class TestAsdlParser(unittest.TestCase):
v = CustomVisitor()
v.visit(self.types['mod'])
self.assertEqual(v.names_with_seq, ['Module', 'Interactive', 'Suite'])
self.assertEqual(v.names_with_seq,
['Module', 'Module', 'Interactive', 'FunctionType', 'Suite'])
if __name__ == '__main__':
......
This diff is collapsed.
import ast
import unittest
funcdef = """\
def foo():
# type: () -> int
pass
def bar(): # type: () -> None
pass
"""
asyncdef = """\
async def foo():
# type: () -> int
return await bar()
async def bar(): # type: () -> int
return await bar()
"""
redundantdef = """\
def foo(): # type: () -> int
# type: () -> str
return ''
"""
nonasciidef = """\
def foo():
# type: () -> àçčéñt
pass
"""
forstmt = """\
for a in []: # type: int
pass
"""
withstmt = """\
with context() as a: # type: int
pass
"""
vardecl = """\
a = 0 # type: int
"""
ignores = """\
def foo():
pass # type: ignore
def bar():
x = 1 # type: ignore
"""
# Test for long-form type-comments in arguments. A test function
# named 'fabvk' would have two positional args, a and b, plus a
# var-arg *v, plus a kw-arg **k. It is verified in test_longargs()
# that it has exactly these arguments, no more, no fewer.
longargs = """\
def fa(
a = 1, # type: A
):
pass
def fa(
a = 1 # type: A
):
pass
def fab(
a, # type: A
b, # type: B
):
pass
def fab(
a, # type: A
b # type: B
):
pass
def fv(
*v, # type: V
):
pass
def fv(
*v # type: V
):
pass
def fk(
**k, # type: K
):
pass
def fk(
**k # type: K
):
pass
def fvk(
*v, # type: V
**k, # type: K
):
pass
def fvk(
*v, # type: V
**k # type: K
):
pass
def fav(
a, # type: A
*v, # type: V
):
pass
def fav(
a, # type: A
*v # type: V
):
pass
def fak(
a, # type: A
**k, # type: K
):
pass
def fak(
a, # type: A
**k # type: K
):
pass
def favk(
a, # type: A
*v, # type: V
**k, # type: K
):
pass
def favk(
a, # type: A
*v, # type: V
**k # type: K
):
pass
"""
class TypeCommentTests(unittest.TestCase):
def parse(self, source):
return ast.parse(source, type_comments=True)
def classic_parse(self, source):
return ast.parse(source)
def test_funcdef(self):
tree = self.parse(funcdef)
self.assertEqual(tree.body[0].type_comment, "() -> int")
self.assertEqual(tree.body[1].type_comment, "() -> None")
tree = self.classic_parse(funcdef)
self.assertEqual(tree.body[0].type_comment, None)
self.assertEqual(tree.body[1].type_comment, None)
def test_asyncdef(self):
tree = self.parse(asyncdef)
self.assertEqual(tree.body[0].type_comment, "() -> int")
self.assertEqual(tree.body[1].type_comment, "() -> int")
tree = self.classic_parse(asyncdef)
self.assertEqual(tree.body[0].type_comment, None)
self.assertEqual(tree.body[1].type_comment, None)
def test_redundantdef(self):
with self.assertRaisesRegex(SyntaxError, "^Cannot have two type comments on def"):
tree = self.parse(redundantdef)
def test_nonasciidef(self):
tree = self.parse(nonasciidef)
self.assertEqual(tree.body[0].type_comment, "() -> àçčéñt")
def test_forstmt(self):
tree = self.parse(forstmt)
self.assertEqual(tree.body[0].type_comment, "int")
tree = self.classic_parse(forstmt)
self.assertEqual(tree.body[0].type_comment, None)
def test_withstmt(self):
tree = self.parse(withstmt)
self.assertEqual(tree.body[0].type_comment, "int")
tree = self.classic_parse(withstmt)
self.assertEqual(tree.body[0].type_comment, None)
def test_vardecl(self):
tree = self.parse(vardecl)
self.assertEqual(tree.body[0].type_comment, "int")
tree = self.classic_parse(vardecl)
self.assertEqual(tree.body[0].type_comment, None)
def test_ignores(self):
tree = self.parse(ignores)
self.assertEqual([ti.lineno for ti in tree.type_ignores], [2, 5])
tree = self.classic_parse(ignores)
self.assertEqual(tree.type_ignores, [])
def test_longargs(self):
tree = self.parse(longargs)
for t in tree.body:
# The expected args are encoded in the function name
todo = set(t.name[1:])
self.assertEqual(len(t.args.args),
len(todo) - bool(t.args.vararg) - bool(t.args.kwarg))
self.assertTrue(t.name.startswith('f'), t.name)
for c in t.name[1:]:
todo.remove(c)
if c == 'v':
arg = t.args.vararg
elif c == 'k':
arg = t.args.kwarg
else:
assert 0 <= ord(c) - ord('a') < len(t.args.args)
arg = t.args.args[ord(c) - ord('a')]
self.assertEqual(arg.arg, c) # That's the argument name
self.assertEqual(arg.type_comment, arg.arg.upper())
assert not todo
tree = self.classic_parse(longargs)
for t in tree.body:
for arg in t.args.args + [t.args.vararg, t.args.kwarg]:
if arg is not None:
self.assertIsNone(arg.type_comment, "%s(%s:%r)" %
(t.name, arg.arg, arg.type_comment))
def test_inappropriate_type_comments(self):
"""Tests for inappropriately-placed type comments.
These should be silently ignored with type comments off,
but raise SyntaxError with type comments on.
This is not meant to be exhaustive.
"""
def check_both_ways(source):
ast.parse(source, type_comments=False)
with self.assertRaises(SyntaxError):
ast.parse(source, type_comments=True)
check_both_ways("pass # type: int\n")
check_both_ways("foo() # type: int\n")
check_both_ways("x += 1 # type: int\n")
check_both_ways("while True: # type: int\n continue\n")
check_both_ways("while True:\n continue # type: int\n")
check_both_ways("try: # type: int\n pass\nfinally:\n pass\n")
check_both_ways("try:\n pass\nfinally: # type: int\n pass\n")
def test_func_type_input(self):
def parse_func_type_input(source):
return ast.parse(source, "<unknown>", "func_type")
# Some checks below will crash if the returned structure is wrong
tree = parse_func_type_input("() -> int")
self.assertEqual(tree.argtypes, [])
self.assertEqual(tree.returns.id, "int")
tree = parse_func_type_input("(int) -> List[str]")
self.assertEqual(len(tree.argtypes), 1)
arg = tree.argtypes[0]
self.assertEqual(arg.id, "int")
self.assertEqual(tree.returns.value.id, "List")
self.assertEqual(tree.returns.slice.value.id, "str")
tree = parse_func_type_input("(int, *str, **Any) -> float")
self.assertEqual(tree.argtypes[0].id, "int")
self.assertEqual(tree.argtypes[1].id, "str")
self.assertEqual(tree.argtypes[2].id, "Any")
self.assertEqual(tree.returns.id, "float")
with self.assertRaises(SyntaxError):
tree = parse_func_type_input("(int, *str, *Any) -> float")
with self.assertRaises(SyntaxError):
tree = parse_func_type_input("(int, **str, Any) -> float")
with self.assertRaises(SyntaxError):
tree = parse_func_type_input("(**int, **str) -> float")
if __name__ == '__main__':
unittest.main()
......@@ -58,12 +58,14 @@ RARROW = 51
ELLIPSIS = 52
COLONEQUAL = 53
OP = 54
TYPE_IGNORE = 55
TYPE_COMMENT = 56
# These aren't used by the C tokenizer but are needed for tokenize.py
ERRORTOKEN = 55
COMMENT = 56
NL = 57
ENCODING = 58
N_TOKENS = 59
ERRORTOKEN = 57
COMMENT = 58
NL = 59
ENCODING = 60
N_TOKENS = 61
# Special definitions for cooperation with parser
NT_OFFSET = 256
......
Add the option to parse PEP 484 type comments in the ast module. (Off by default.) This is merging the key functionality of the third party fork thereof, [typed_ast](https://github.com/python/typed_ast).
\ No newline at end of file
......@@ -663,6 +663,12 @@ validate_node(node *tree)
for (pos = 0; pos < nch; ++pos) {
node *ch = CHILD(tree, pos);
int ch_type = TYPE(ch);
if (ch_type == suite && TYPE(tree) == funcdef) {
/* This is the opposite hack of what we do in parser.c
(search for func_body_suite), except we don't ever
support type comments here. */
ch_type = func_body_suite;
}
for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
short a_label = dfa_state->s_arc[arc].a_lbl;
assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels);
......
......@@ -3,17 +3,20 @@
module Python
{
mod = Module(stmt* body)
mod = Module(stmt* body, type_ignore *type_ignores)
| Interactive(stmt* body)
| Expression(expr body)
| FunctionType(expr* argtypes, expr returns)
-- not really an actual node but useful in Jython's typesystem.
| Suite(stmt* body)
stmt = FunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list, expr? returns)
stmt* body, expr* decorator_list, expr? returns,
string? type_comment)
| AsyncFunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list, expr? returns)
stmt* body, expr* decorator_list, expr? returns,
string? type_comment)
| ClassDef(identifier name,
expr* bases,
......@@ -23,18 +26,18 @@ module Python
| Return(expr? value)
| Delete(expr* targets)
| Assign(expr* targets, expr value)
| Assign(expr* targets, expr value, string? type_comment)
| AugAssign(expr target, operator op, expr value)
-- 'simple' indicates that we annotate simple name without parens
| AnnAssign(expr target, expr annotation, expr? value, int simple)
-- use 'orelse' because else is a keyword in target languages
| For(expr target, expr iter, stmt* body, stmt* orelse)
| AsyncFor(expr target, expr iter, stmt* body, stmt* orelse)
| For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
| AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
| While(expr test, stmt* body, stmt* orelse)
| If(expr test, stmt* body, stmt* orelse)
| With(withitem* items, stmt* body)
| AsyncWith(withitem* items, stmt* body)
| With(withitem* items, stmt* body, string? type_comment)
| AsyncWith(withitem* items, stmt* body, string? type_comment)
| Raise(expr? exc, expr? cause)
| Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
......@@ -111,7 +114,7 @@ module Python
arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults,
arg? kwarg, expr* defaults)
arg = (identifier arg, expr? annotation)
arg = (identifier arg, expr? annotation, string? type_comment)
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
-- keyword arguments supplied to call (NULL identifier for **kwargs)
......@@ -121,5 +124,7 @@ module Python
alias = (identifier name, identifier? asname)
withitem = (expr context_expr, expr? optional_vars)
type_ignore = TypeIgnore(int lineno)
}
......@@ -890,6 +890,15 @@ static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena)
return obj2ast_object(obj, out, arena);
}
static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena)
{
if (!PyUnicode_CheckExact(obj) && !PyBytes_CheckExact(obj)) {
PyErr_SetString(PyExc_TypeError, "AST string must be of type str");
return 1;
}
return obj2ast_object(obj, out, arena);
}
static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
{
int i;
......@@ -993,6 +1002,8 @@ class ASTModuleVisitor(PickleVisitor):
self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return NULL;', 1)
self.emit('if (PyModule_AddIntMacro(m, PyCF_ONLY_AST) < 0)', 1)
self.emit("return NULL;", 2)
self.emit('if (PyModule_AddIntMacro(m, PyCF_TYPE_COMMENTS) < 0)', 1)
self.emit("return NULL;", 2)
for dfn in mod.dfns:
self.visit(dfn)
self.emit("return m;", 1)
......@@ -1176,18 +1187,19 @@ PyObject* PyAST_mod2obj(mod_ty t)
}
/* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */
/* and 3 for "func_type" */
mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode)
{
mod_ty res;
PyObject *req_type[3];
char *req_name[] = {"Module", "Expression", "Interactive"};
char *req_name[] = {"Module", "Expression", "Interactive", "FunctionType"};
int isinstance;
req_type[0] = (PyObject*)Module_type;
req_type[1] = (PyObject*)Expression_type;
req_type[2] = (PyObject*)Interactive_type;
assert(0 <= mode && mode <= 2);
assert(0 <= mode && mode <= 3);
if (!init_types())
return NULL;
......
......@@ -12,6 +12,7 @@
#include "node.h"
#include "parser.h"
#include "errcode.h"
#include "graminit.h"
#ifdef Py_DEBUG
......@@ -260,7 +261,15 @@ PyParser_AddToken(parser_state *ps, int type, char *str,
/* Push non-terminal */
int nt = (x >> 8) + NT_OFFSET;
int arrow = x & ((1<<7)-1);
dfa *d1 = PyGrammar_FindDFA(
dfa *d1;
if (nt == func_body_suite && !(ps->p_flags & PyCF_TYPE_COMMENTS)) {
/* When parsing type comments is not requested,
we can provide better errors about bad indentation
by using 'suite' for the body of a funcdef */
D(printf(" [switch func_body_suite to suite]"));
nt = suite;
}
d1 = PyGrammar_FindDFA(
ps->p_grammar, nt);
if ((err = push(&ps->p_stack, nt, d1,
arrow, lineno, col_offset,
......@@ -268,7 +277,7 @@ PyParser_AddToken(parser_state *ps, int type, char *str,
D(printf(" MemError: push\n"));
return err;
}
D(printf(" Push ...\n"));
D(printf(" Push '%s'\n", d1->d_name));
continue;
}
......
......@@ -15,6 +15,42 @@
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
static int initerr(perrdetail *err_ret, PyObject * filename);
typedef struct {
int *items;
size_t size;
size_t num_items;
} growable_int_array;
static int
growable_int_array_init(growable_int_array *arr, size_t initial_size) {
assert(initial_size > 0);
arr->items = malloc(initial_size * sizeof(*arr->items));
arr->size = initial_size;
arr->num_items = 0;
return arr->items != NULL;
}
static int
growable_int_array_add(growable_int_array *arr, int item) {
if (arr->num_items >= arr->size) {
arr->size *= 2;
arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
if (!arr->items) {
return 0;
}
}
arr->items[arr->num_items] = item;
arr->num_items++;
return 1;
}
static void
growable_int_array_deallocate(growable_int_array *arr) {
free(arr->items);
}
/* Parse input coming from a string. Return error code, print some errors. */
node *
PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
......@@ -59,6 +95,9 @@ PyParser_ParseStringObject(const char *s, PyObject *filename,
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
}
if (*flags & PyPARSE_TYPE_COMMENTS) {
tok->type_comments = 1;
}
#ifndef PGEN
Py_INCREF(err_ret->filename);
......@@ -127,6 +166,9 @@ PyParser_ParseFileObject(FILE *fp, PyObject *filename,
err_ret->error = E_NOMEM;
return NULL;
}
if (*flags & PyPARSE_TYPE_COMMENTS) {
tok->type_comments = 1;
}
#ifndef PGEN
Py_INCREF(err_ret->filename);
tok->filename = err_ret->filename;
......@@ -188,6 +230,13 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
node *n;
int started = 0;
int col_offset, end_col_offset;
growable_int_array type_ignores;
if (!growable_int_array_init(&type_ignores, 10)) {
err_ret->error = E_NOMEM;
PyTokenizer_Free(tok);
return NULL;
}
if ((ps = PyParser_New(g, start)) == NULL) {
err_ret->error = E_NOMEM;
......@@ -197,6 +246,8 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (*flags & PyPARSE_BARRY_AS_BDFL)
ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
if (*flags & PyPARSE_TYPE_COMMENTS)
ps->p_flags |= PyCF_TYPE_COMMENTS;
#endif
for (;;) {
......@@ -277,6 +328,15 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
else {
end_col_offset = -1;
}
if (type == TYPE_IGNORE) {
if (!growable_int_array_add(&type_ignores, tok->lineno)) {
err_ret->error = E_NOMEM;
break;
}
continue;
}
if ((err_ret->error =
PyParser_AddToken(ps, (int)type, str,
lineno, col_offset, tok->lineno, end_col_offset,
......@@ -293,6 +353,24 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
n = ps->p_tree;
ps->p_tree = NULL;
if (n->n_type == file_input) {
/* Put type_ignore nodes in the ENDMARKER of file_input. */
int num;
node *ch;
size_t i;
num = NCH(n);
ch = CHILD(n, num - 1);
REQ(ch, ENDMARKER);
for (i = 0; i < type_ignores.num_items; i++) {
PyNode_AddChild(ch, TYPE_IGNORE, NULL,
type_ignores.items[i], 0,
type_ignores.items[i], 0);
}
}
growable_int_array_deallocate(&type_ignores);
#ifndef PGEN
/* Check that the source for a single input statement really
is a single statement by looking at what is left in the
......
......@@ -61,6 +61,8 @@ const char * const _PyParser_TokenNames[] = {
"ELLIPSIS",
"COLONEQUAL",
"OP",
"TYPE_IGNORE",
"TYPE_COMMENT",
"<ERRORTOKEN>",
"<COMMENT>",
"<NL>",
......
......@@ -48,6 +48,10 @@ static int tok_nextc(struct tok_state *tok);
static void tok_backup(struct tok_state *tok, int c);
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
tokenizing. */
static const char* type_comment_prefix = "# type: ";
/* Create and initialize a new tok_state structure */
static struct tok_state *
......@@ -82,6 +86,7 @@ tok_new(void)
tok->decoding_readline = NULL;
tok->decoding_buffer = NULL;
#endif
tok->type_comments = 0;
return tok;
}
......@@ -1245,11 +1250,61 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
/* Set start of current token */
tok->start = tok->cur - 1;
/* Skip comment */
/* Skip comment, unless it's a type comment */
if (c == '#') {
const char *prefix, *p, *type_start;
while (c != EOF && c != '\n') {
c = tok_nextc(tok);
}
if (tok->type_comments) {
p = tok->start;
prefix = type_comment_prefix;
while (*prefix && p < tok->cur) {
if (*prefix == ' ') {
while (*p == ' ' || *p == '\t') {
p++;
}
} else if (*prefix == *p) {
p++;
} else {
break;
}
prefix++;
}
/* This is a type comment if we matched all of type_comment_prefix. */
if (!*prefix) {
int is_type_ignore = 1;
tok_backup(tok, c); /* don't eat the newline or EOF */
type_start = p;
is_type_ignore = tok->cur >= p + 6 && memcmp(p, "ignore", 6) == 0;
p += 6;
while (is_type_ignore && p < tok->cur) {
if (*p == '#')
break;
is_type_ignore = is_type_ignore && (*p == ' ' || *p == '\t');
p++;
}
if (is_type_ignore) {
/* If this type ignore is the only thing on the line, consume the newline also. */
if (blankline) {
tok_nextc(tok);
tok->atbol = 1;
}
return TYPE_IGNORE;
} else {
*p_start = (char *) type_start; /* after type_comment_prefix */
*p_end = tok->cur;
return TYPE_COMMENT;
}
}
}
}
/* Check for EOF and errors now */
......
......@@ -70,6 +70,8 @@ struct tok_state {
const char* enc; /* Encoding for the current str. */
const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
int type_comments; /* Whether to look for type comments */
};
extern struct tok_state *PyTokenizer_FromString(const char *, int);
......
This diff is collapsed.
This diff is collapsed.
......@@ -765,13 +765,13 @@ builtin_compile_impl(PyObject *module, PyObject *source, PyObject *filename,
int compile_mode = -1;
int is_ast;
PyCompilerFlags cf;
int start[] = {Py_file_input, Py_eval_input, Py_single_input};
int start[] = {Py_file_input, Py_eval_input, Py_single_input, Py_func_type_input};
PyObject *result;
cf.cf_flags = flags | PyCF_SOURCE_IS_UTF8;
if (flags &
~(PyCF_MASK | PyCF_MASK_OBSOLETE | PyCF_DONT_IMPLY_DEDENT | PyCF_ONLY_AST))
~(PyCF_MASK | PyCF_MASK_OBSOLETE | PyCF_DONT_IMPLY_DEDENT | PyCF_ONLY_AST | PyCF_TYPE_COMMENTS))
{
PyErr_SetString(PyExc_ValueError,
"compile(): unrecognised flags");
......@@ -795,9 +795,21 @@ builtin_compile_impl(PyObject *module, PyObject *source, PyObject *filename,
compile_mode = 1;
else if (strcmp(mode, "single") == 0)
compile_mode = 2;
else if (strcmp(mode, "func_type") == 0) {
if (!(flags & PyCF_ONLY_AST)) {
PyErr_SetString(PyExc_ValueError,
"compile() mode 'func_type' requires flag PyCF_ONLY_AST");
goto error;
}
compile_mode = 3;
}
else {
PyErr_SetString(PyExc_ValueError,
"compile() mode must be 'exec', 'eval' or 'single'");
const char *msg;
if (flags & PyCF_ONLY_AST)
msg = "compile() mode must be 'exec', 'eval', 'single' or 'func_type'";
else
msg = "compile() mode must be 'exec', 'eval' or 'single'";
PyErr_SetString(PyExc_ValueError, msg);
goto error;
}
......
This diff is collapsed.
......@@ -158,6 +158,8 @@ static int PARSER_FLAGS(PyCompilerFlags *flags)
parser_flags |= PyPARSE_IGNORE_COOKIE;
if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL)
parser_flags |= PyPARSE_BARRY_AS_BDFL;
if (flags->cf_flags & PyCF_TYPE_COMMENTS)
parser_flags |= PyPARSE_TYPE_COMMENTS;
return parser_flags;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment