Commit 995d9b92 authored by Anthony Sottile's avatar Anthony Sottile Committed by INADA Naoki

bpo-16806: Fix `lineno` and `col_offset` for multi-line string tokens (GH-10021)

parent 1cffd0ee
......@@ -683,6 +683,25 @@ class ASTHelpers_Test(unittest.TestCase):
node = ast.parse('async def foo():\n x = "not docstring"')
self.assertIsNone(ast.get_docstring(node.body[0]))
def test_multi_line_docstring_col_offset_and_lineno_issue16806(self):
node = ast.parse(
'"""line one\nline two"""\n\n'
'def foo():\n """line one\n line two"""\n\n'
' def bar():\n """line one\n line two"""\n'
' """line one\n line two"""\n'
'"""line one\nline two"""\n\n'
)
self.assertEqual(node.body[0].col_offset, 0)
self.assertEqual(node.body[0].lineno, 1)
self.assertEqual(node.body[1].body[0].col_offset, 2)
self.assertEqual(node.body[1].body[0].lineno, 5)
self.assertEqual(node.body[1].body[1].body[0].col_offset, 4)
self.assertEqual(node.body[1].body[1].body[0].lineno, 9)
self.assertEqual(node.body[1].body[2].col_offset, 2)
self.assertEqual(node.body[1].body[2].lineno, 11)
self.assertEqual(node.body[2].col_offset, 0)
self.assertEqual(node.body[2].lineno, 13)
def test_literal_eval(self):
self.assertEqual(ast.literal_eval('[1, 2, 3]'), [1, 2, 3])
self.assertEqual(ast.literal_eval('{"foo": 42}'), {"foo": 42})
......
......@@ -270,10 +270,7 @@ f'{a * x()} {a * x()} {a * x()}'
self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong
def test_ast_line_numbers_multiline_fstring(self):
# FIXME: This test demonstrates invalid behavior due to JoinedStr's
# immediate child nodes containing the wrong lineno. The enclosed
# expressions have valid line information and column offsets.
# See bpo-16806 and bpo-30465 for details.
# See bpo-30465 for details.
expr = """
a = 10
f'''
......@@ -298,19 +295,16 @@ non-important content
self.assertEqual(type(t.body[1].value.values[1]), ast.FormattedValue)
self.assertEqual(type(t.body[1].value.values[2]), ast.Constant)
self.assertEqual(type(t.body[1].value.values[2].value), str)
# NOTE: the following invalid behavior is described in bpo-16806.
# - line number should be the *first* line (3), not the *last* (8)
# - column offset should not be -1
self.assertEqual(t.body[1].lineno, 8)
self.assertEqual(t.body[1].value.lineno, 8)
self.assertEqual(t.body[1].value.values[0].lineno, 8)
self.assertEqual(t.body[1].value.values[1].lineno, 8)
self.assertEqual(t.body[1].value.values[2].lineno, 8)
self.assertEqual(t.body[1].col_offset, -1)
self.assertEqual(t.body[1].value.col_offset, -1)
self.assertEqual(t.body[1].value.values[0].col_offset, -1)
self.assertEqual(t.body[1].value.values[1].col_offset, -1)
self.assertEqual(t.body[1].value.values[2].col_offset, -1)
self.assertEqual(t.body[1].lineno, 3)
self.assertEqual(t.body[1].value.lineno, 3)
self.assertEqual(t.body[1].value.values[0].lineno, 3)
self.assertEqual(t.body[1].value.values[1].lineno, 3)
self.assertEqual(t.body[1].value.values[2].lineno, 3)
self.assertEqual(t.body[1].col_offset, 0)
self.assertEqual(t.body[1].value.col_offset, 0)
self.assertEqual(t.body[1].value.values[0].col_offset, 0)
self.assertEqual(t.body[1].value.values[1].col_offset, 0)
self.assertEqual(t.body[1].value.values[2].col_offset, 0)
# NOTE: the following lineno information and col_offset is correct for
# expressions within FormattedValues.
binop = t.body[1].value.values[1].value
......@@ -321,8 +315,8 @@ non-important content
self.assertEqual(binop.lineno, 4)
self.assertEqual(binop.left.lineno, 4)
self.assertEqual(binop.right.lineno, 6)
self.assertEqual(binop.col_offset, 3)
self.assertEqual(binop.left.col_offset, 3)
self.assertEqual(binop.col_offset, 4)
self.assertEqual(binop.left.col_offset, 4)
self.assertEqual(binop.right.col_offset, 7)
def test_docstring(self):
......
......@@ -27,7 +27,7 @@ class OpcodeTest(unittest.TestCase):
with open(ann_module.__file__) as f:
txt = f.read()
co = compile(txt, ann_module.__file__, 'exec')
self.assertEqual(co.co_firstlineno, 6)
self.assertEqual(co.co_firstlineno, 3)
except OSError:
pass
......
......@@ -117,7 +117,7 @@ class TestLiterals(unittest.TestCase):
eval("'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
self.assertEqual(w[0].lineno, 1)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=SyntaxWarning)
......@@ -126,7 +126,7 @@ class TestLiterals(unittest.TestCase):
exc = cm.exception
self.assertEqual(w, [])
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 2)
self.assertEqual(exc.lineno, 1)
def test_eval_str_raw(self):
self.assertEqual(eval(""" r'x' """), 'x')
......@@ -166,7 +166,7 @@ class TestLiterals(unittest.TestCase):
eval("b'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
self.assertEqual(w[0].lineno, 1)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=SyntaxWarning)
......@@ -175,7 +175,7 @@ class TestLiterals(unittest.TestCase):
exc = cm.exception
self.assertEqual(w, [])
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 2)
self.assertEqual(exc.lineno, 1)
def test_eval_bytes_raw(self):
self.assertEqual(eval(""" br'x' """), b'x')
......
......@@ -1845,3 +1845,4 @@ Gennadiy Zlobin
Doug Zongker
Peter Åstrand
Zheao Li
Carsten Klein
Fix ``lineno`` and ``col_offset`` for multi-line string tokens.
......@@ -205,6 +205,8 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
size_t len;
char *str;
col_offset = -1;
int lineno;
const char *line_start;
type = PyTokenizer_Get(tok, &a, &b);
if (type == ERRORTOKEN) {
......@@ -253,8 +255,15 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
}
}
#endif
if (a != NULL && a >= tok->line_start) {
col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
/* Nodes of type STRING, especially multi line strings
must be handled differently in order to get both
the starting line number and the column offset right.
(cf. issue 16806) */
lineno = type == STRING ? tok->first_lineno : tok->lineno;
line_start = type == STRING ? tok->multi_line_start : tok->line_start;
if (a != NULL && a >= line_start) {
col_offset = Py_SAFE_DOWNCAST(a - line_start,
intptr_t, int);
}
else {
......@@ -263,7 +272,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
if ((err_ret->error =
PyParser_AddToken(ps, (int)type, str,
tok->lineno, col_offset,
lineno, col_offset,
&(err_ret->expected))) != E_OK) {
if (err_ret->error != E_DONE) {
PyObject_FREE(str);
......
......@@ -1519,6 +1519,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
int quote_size = 1; /* 1 or 3 */
int end_quote_size = 0;
/* Nodes of type STRING, especially multi line strings
must be handled differently in order to get both
the starting line number and the column offset right.
(cf. issue 16806) */
tok->first_lineno = tok->lineno;
tok->multi_line_start = tok->line_start;
/* Find the quote size and start of string */
c = tok_nextc(tok);
if (c == quote) {
......
......@@ -38,6 +38,8 @@ struct tok_state {
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
const char *prompt, *nextprompt; /* For interactive prompting */
int lineno; /* Current line number */
int first_lineno; /* First line of a single line or multi line string
expression (cf. issue 16806) */
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
#ifndef PGEN
......@@ -58,6 +60,9 @@ struct tok_state {
char *encoding; /* Source encoding. */
int cont_line; /* whether we are in a continuation line. */
const char* line_start; /* pointer to start of current line */
const char* multi_line_start; /* pointer to start of first line of
a single line or multi line string
expression (cf. issue 16806) */
#ifndef PGEN
PyObject *decoding_readline; /* open(...).readline */
PyObject *decoding_buffer;
......
......@@ -4284,9 +4284,13 @@ fstring_fix_node_location(const node *parent, node *n, char *expr_str)
start--;
}
cols += (int)(substr - start);
/* Fix lineno in mulitline strings. */
while ((substr = strchr(substr + 1, '\n')))
lines--;
/* adjust the start based on the number of newlines encountered
before the f-string expression */
for (char* p = parent->n_str; p < substr; p++) {
if (*p == '\n') {
lines++;
}
}
}
}
fstring_shift_node_locations(n, lines, cols);
......
......@@ -204,7 +204,7 @@ const unsigned char _Py_M__importlib_bootstrap[] = {
0,0,114,38,0,0,0,114,39,0,0,0,114,48,0,0,
0,114,10,0,0,0,114,10,0,0,0,114,10,0,0,0,
114,11,0,0,0,114,20,0,0,0,52,0,0,0,115,12,
0,0,0,8,4,4,2,8,8,8,12,8,25,8,13,114,
0,0,0,8,1,4,5,8,8,8,12,8,25,8,13,114,
20,0,0,0,99,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,0,64,0,0,0,115,48,0,0,0,101,0,
90,1,100,0,90,2,100,1,90,3,100,2,100,3,132,0,
......@@ -255,8 +255,8 @@ const unsigned char _Py_M__importlib_bootstrap[] = {
0,0,0,114,3,0,0,0,114,31,0,0,0,114,38,0,
0,0,114,39,0,0,0,114,48,0,0,0,114,10,0,0,
0,114,10,0,0,0,114,10,0,0,0,114,11,0,0,0,
114,49,0,0,0,120,0,0,0,115,10,0,0,0,8,2,
4,2,8,4,8,4,8,5,114,49,0,0,0,99,0,0,
114,49,0,0,0,120,0,0,0,115,10,0,0,0,8,1,
4,3,8,4,8,4,8,5,114,49,0,0,0,99,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,64,0,
0,0,115,36,0,0,0,101,0,90,1,100,0,90,2,100,
1,100,2,132,0,90,3,100,3,100,4,132,0,90,4,100,
......@@ -730,7 +730,7 @@ const unsigned char _Py_M__importlib_bootstrap[] = {
114,123,0,0,0,218,6,115,101,116,116,101,114,114,130,0,
0,0,114,124,0,0,0,114,10,0,0,0,114,10,0,0,
0,114,10,0,0,0,114,11,0,0,0,114,112,0,0,0,
49,1,0,0,115,32,0,0,0,8,35,4,2,4,1,2,
49,1,0,0,115,32,0,0,0,8,1,4,36,4,1,2,
255,12,12,8,10,8,12,2,1,10,8,4,1,10,3,2,
1,10,7,2,1,10,3,4,1,114,112,0,0,0,169,2,
114,113,0,0,0,114,115,0,0,0,99,2,0,0,0,2,
......@@ -1147,7 +1147,7 @@ const unsigned char _Py_M__importlib_bootstrap[] = {
0,0,0,114,169,0,0,0,114,115,0,0,0,114,97,0,
0,0,114,153,0,0,0,114,10,0,0,0,114,10,0,0,
0,114,10,0,0,0,114,11,0,0,0,114,158,0,0,0,
195,2,0,0,115,42,0,0,0,8,7,4,2,2,1,10,
195,2,0,0,115,42,0,0,0,8,2,4,7,2,1,10,
8,2,1,12,8,2,1,12,11,2,1,10,7,2,1,10,
4,2,1,2,1,12,4,2,1,2,1,12,4,2,1,2,
1,12,4,114,158,0,0,0,99,0,0,0,0,0,0,0,
......@@ -1280,7 +1280,7 @@ const unsigned char _Py_M__importlib_bootstrap[] = {
0,114,90,0,0,0,114,168,0,0,0,114,169,0,0,0,
114,115,0,0,0,114,10,0,0,0,114,10,0,0,0,114,
10,0,0,0,114,11,0,0,0,114,172,0,0,0,12,3,
0,0,115,44,0,0,0,8,7,4,2,2,1,10,8,2,
0,0,115,44,0,0,0,8,2,4,7,2,1,10,8,2,
1,12,6,2,1,12,8,2,1,10,3,2,1,10,8,2,
1,10,8,2,1,2,1,12,4,2,1,2,1,12,4,2,
1,2,1,114,172,0,0,0,99,0,0,0,0,0,0,0,
......@@ -1757,8 +1757,8 @@ const unsigned char _Py_M__importlib_bootstrap[] = {
0,114,215,0,0,0,114,218,0,0,0,114,219,0,0,0,
114,223,0,0,0,114,224,0,0,0,114,226,0,0,0,114,
10,0,0,0,114,10,0,0,0,114,10,0,0,0,114,11,
0,0,0,218,8,60,109,111,100,117,108,101,62,8,0,0,
0,115,94,0,0,0,4,17,4,2,8,8,8,8,4,2,
0,0,0,218,8,60,109,111,100,117,108,101,62,1,0,0,
0,115,94,0,0,0,4,24,4,2,8,8,8,8,4,2,
4,3,16,4,14,68,14,21,14,16,8,37,8,17,8,11,
14,8,8,11,8,12,8,16,8,36,14,101,16,26,10,45,
14,72,8,17,8,17,8,30,8,37,8,42,8,15,14,73,
......
......@@ -1137,8 +1137,8 @@ const unsigned char _Py_M__importlib_bootstrap_external[] = {
0,0,114,127,0,0,0,114,182,0,0,0,114,212,0,0,
0,114,217,0,0,0,114,220,0,0,0,114,3,0,0,0,
114,3,0,0,0,114,3,0,0,0,114,6,0,0,0,114,
208,0,0,0,243,2,0,0,115,10,0,0,0,8,3,4,
2,8,8,8,3,8,8,114,208,0,0,0,99,0,0,0,
208,0,0,0,243,2,0,0,115,10,0,0,0,8,2,4,
3,8,8,8,3,8,8,114,208,0,0,0,99,0,0,0,
0,0,0,0,0,0,0,0,0,3,0,0,0,64,0,0,
0,115,74,0,0,0,101,0,90,1,100,0,90,2,100,1,
100,2,132,0,90,3,100,3,100,4,132,0,90,4,100,5,
......@@ -1236,7 +1236,7 @@ const unsigned char _Py_M__importlib_bootstrap_external[] = {
32,32,32,78,114,3,0,0,0,41,3,114,119,0,0,0,
114,44,0,0,0,114,26,0,0,0,114,3,0,0,0,114,
3,0,0,0,114,6,0,0,0,114,225,0,0,0,50,3,
0,0,115,2,0,0,0,0,4,122,21,83,111,117,114,99,
0,0,115,2,0,0,0,0,1,122,21,83,111,117,114,99,
101,76,111,97,100,101,114,46,115,101,116,95,100,97,116,97,
99,2,0,0,0,0,0,0,0,5,0,0,0,10,0,0,
0,67,0,0,0,115,82,0,0,0,124,0,160,0,124,1,
......@@ -1520,7 +1520,7 @@ const unsigned char _Py_M__importlib_bootstrap_external[] = {
1,0,0,90,13,95,95,99,108,97,115,115,99,101,108,108,
95,95,114,3,0,0,0,114,3,0,0,0,114,249,0,0,
0,114,6,0,0,0,114,239,0,0,0,160,3,0,0,115,
30,0,0,0,8,3,4,2,8,6,8,4,8,3,2,1,
30,0,0,0,8,2,4,3,8,6,8,4,8,3,2,1,
14,11,2,1,10,4,8,7,2,1,10,5,8,4,8,6,
8,6,114,239,0,0,0,99,0,0,0,0,0,0,0,0,
0,0,0,0,3,0,0,0,64,0,0,0,115,46,0,0,
......@@ -1768,7 +1768,7 @@ const unsigned char _Py_M__importlib_bootstrap_external[] = {
114,213,0,0,0,114,229,0,0,0,114,136,0,0,0,114,
179,0,0,0,114,3,0,0,0,114,3,0,0,0,114,3,
0,0,0,114,6,0,0,0,114,15,1,0,0,46,4,0,
0,115,22,0,0,0,8,6,4,2,8,4,8,4,8,3,
0,115,22,0,0,0,8,2,4,6,8,4,8,4,8,3,
8,8,8,6,8,6,8,4,8,4,2,1,114,15,1,0,
0,99,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
0,0,64,0,0,0,115,96,0,0,0,101,0,90,1,100,
......@@ -1913,7 +1913,7 @@ const unsigned char _Py_M__importlib_bootstrap_external[] = {
114,36,1,0,0,114,37,1,0,0,114,40,1,0,0,114,
186,0,0,0,114,3,0,0,0,114,3,0,0,0,114,3,
0,0,0,114,6,0,0,0,114,22,1,0,0,99,4,0,
0,115,22,0,0,0,8,5,4,2,8,6,8,10,8,4,
0,115,22,0,0,0,8,1,4,6,8,6,8,10,8,4,
8,13,8,3,8,3,8,3,8,3,8,3,114,22,1,0,
0,99,0,0,0,0,0,0,0,0,0,0,0,0,3,0,
0,0,64,0,0,0,115,80,0,0,0,101,0,90,1,100,
......@@ -2462,7 +2462,7 @@ const unsigned char _Py_M__importlib_bootstrap_external[] = {
0,114,65,1,0,0,114,207,0,0,0,114,72,1,0,0,
114,37,1,0,0,114,3,0,0,0,114,3,0,0,0,114,
3,0,0,0,114,6,0,0,0,114,57,1,0,0,79,5,
0,0,115,22,0,0,0,8,7,4,2,8,14,8,4,4,
0,0,115,22,0,0,0,8,2,4,7,8,14,8,4,4,
2,8,12,8,5,10,48,8,31,2,1,10,17,114,57,1,
0,0,99,4,0,0,0,0,0,0,0,6,0,0,0,8,
0,0,0,67,0,0,0,115,146,0,0,0,124,0,160,0,
......@@ -2641,8 +2641,8 @@ const unsigned char _Py_M__importlib_bootstrap_external[] = {
0,114,57,1,0,0,114,77,1,0,0,114,184,0,0,0,
114,85,1,0,0,114,87,1,0,0,114,3,0,0,0,114,
3,0,0,0,114,3,0,0,0,114,6,0,0,0,218,8,
60,109,111,100,117,108,101,62,8,0,0,0,115,126,0,0,
0,4,15,4,1,4,1,2,1,2,255,4,4,8,17,8,
60,109,111,100,117,108,101,62,1,0,0,0,115,126,0,0,
0,4,22,4,1,4,1,2,1,2,255,4,4,8,17,8,
5,8,5,8,6,8,6,8,12,8,10,8,9,8,5,8,
7,8,9,12,22,10,127,0,7,16,1,12,2,4,1,4,
2,6,2,6,2,8,2,18,71,8,40,8,19,8,12,8,
......
......@@ -484,7 +484,7 @@ const unsigned char _Py_M__zipimport[] = {
64,0,0,0,114,65,0,0,0,114,78,0,0,0,114,82,
0,0,0,114,83,0,0,0,114,9,0,0,0,114,9,0,
0,0,114,9,0,0,0,114,10,0,0,0,114,4,0,0,
0,45,0,0,0,115,24,0,0,0,8,13,4,5,8,46,
0,45,0,0,0,115,24,0,0,0,8,1,4,17,8,46,
10,32,10,12,8,10,8,21,8,11,8,26,8,13,8,38,
8,18,122,12,95,95,105,110,105,116,95,95,46,112,121,99,
84,114,60,0,0,0,70,41,3,122,4,46,112,121,99,84,
......@@ -1044,7 +1044,7 @@ const unsigned char _Py_M__zipimport[] = {
34,0,0,0,114,182,0,0,0,114,183,0,0,0,114,184,
0,0,0,114,189,0,0,0,114,9,0,0,0,114,9,0,
0,0,114,9,0,0,0,114,10,0,0,0,114,80,0,0,
0,212,2,0,0,115,14,0,0,0,8,5,4,1,4,2,
0,212,2,0,0,115,14,0,0,0,8,1,4,5,4,2,
8,4,8,9,8,6,8,11,114,80,0,0,0,41,45,114,
84,0,0,0,90,26,95,102,114,111,122,101,110,95,105,109,
112,111,114,116,108,105,98,95,101,120,116,101,114,110,97,108,
......@@ -1065,8 +1065,8 @@ const unsigned char _Py_M__zipimport[] = {
0,0,114,170,0,0,0,114,152,0,0,0,114,150,0,0,
0,114,44,0,0,0,114,80,0,0,0,114,9,0,0,0,
114,9,0,0,0,114,9,0,0,0,114,10,0,0,0,218,
8,60,109,111,100,117,108,101,62,13,0,0,0,115,88,0,
0,0,4,4,8,1,16,1,8,1,8,1,8,1,8,1,
8,60,109,111,100,117,108,101,62,1,0,0,0,115,88,0,
0,0,4,16,8,1,16,1,8,1,8,1,8,1,8,1,
8,1,8,2,8,3,6,1,14,3,16,4,4,2,8,2,
4,1,4,1,4,2,14,127,0,127,0,1,12,1,12,1,
2,1,2,252,4,9,8,4,8,9,8,31,8,126,2,254,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment