Commit 0d441119 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #25388: Fixed tokenizer crash when processing undecodable source code

with a null byte.
parent 806fb254
...@@ -504,6 +504,16 @@ if 1: ...@@ -504,6 +504,16 @@ if 1:
res = script_helper.run_python_until_end(fn)[0] res = script_helper.run_python_until_end(fn)[0]
self.assertIn(b"Non-UTF-8", res.err) self.assertIn(b"Non-UTF-8", res.err)
def test_yet_more_evil_still_undecodable(self):
# Issue #25388
src = b"#\x00\n#\xfd\n"
with tempfile.TemporaryDirectory() as tmpd:
fn = os.path.join(tmpd, "bad.py")
with open(fn, "wb") as fp:
fp.write(src)
res = script_helper.run_python_until_end(fn)[0]
self.assertIn(b"Non-UTF-8", res.err)
@support.cpython_only @support.cpython_only
def test_compiler_recursion_limit(self): def test_compiler_recursion_limit(self):
# Expected limit is sys.getrecursionlimit() * the scaling factor # Expected limit is sys.getrecursionlimit() * the scaling factor
......
...@@ -10,6 +10,9 @@ Release date: tba ...@@ -10,6 +10,9 @@ Release date: tba
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #25388: Fixed tokenizer crash when processing undecodable source code
with a null byte.
- Issue #22995: Default implementation of __reduce__ and __reduce_ex__ now - Issue #22995: Default implementation of __reduce__ and __reduce_ex__ now
rejects builtin types with not defined __new__. rejects builtin types with not defined __new__.
......
...@@ -187,7 +187,8 @@ error_ret(struct tok_state *tok) /* XXX */ ...@@ -187,7 +187,8 @@ error_ret(struct tok_state *tok) /* XXX */
tok->decoding_erred = 1; tok->decoding_erred = 1;
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
PyMem_FREE(tok->buf); PyMem_FREE(tok->buf);
tok->buf = NULL; tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
tok->done = E_DECODE;
return NULL; /* as if it were EOF */ return NULL; /* as if it were EOF */
} }
...@@ -943,11 +944,6 @@ tok_nextc(struct tok_state *tok) ...@@ -943,11 +944,6 @@ tok_nextc(struct tok_state *tok)
} }
buflen = PyBytes_GET_SIZE(u); buflen = PyBytes_GET_SIZE(u);
buf = PyBytes_AS_STRING(u); buf = PyBytes_AS_STRING(u);
if (!buf) {
Py_DECREF(u);
tok->done = E_DECODE;
return EOF;
}
newtok = PyMem_MALLOC(buflen+1); newtok = PyMem_MALLOC(buflen+1);
strcpy(newtok, buf); strcpy(newtok, buf);
Py_DECREF(u); Py_DECREF(u);
...@@ -989,7 +985,6 @@ tok_nextc(struct tok_state *tok) ...@@ -989,7 +985,6 @@ tok_nextc(struct tok_state *tok)
if (tok->buf != NULL) if (tok->buf != NULL)
PyMem_FREE(tok->buf); PyMem_FREE(tok->buf);
tok->buf = newtok; tok->buf = newtok;
tok->line_start = tok->buf;
tok->cur = tok->buf; tok->cur = tok->buf;
tok->line_start = tok->buf; tok->line_start = tok->buf;
tok->inp = strchr(tok->buf, '\0'); tok->inp = strchr(tok->buf, '\0');
...@@ -1012,6 +1007,7 @@ tok_nextc(struct tok_state *tok) ...@@ -1012,6 +1007,7 @@ tok_nextc(struct tok_state *tok)
} }
if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf), if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
tok) == NULL) { tok) == NULL) {
if (!tok->decoding_erred)
tok->done = E_EOF; tok->done = E_EOF;
done = 1; done = 1;
} }
...@@ -1046,6 +1042,8 @@ tok_nextc(struct tok_state *tok) ...@@ -1046,6 +1042,8 @@ tok_nextc(struct tok_state *tok)
return EOF; return EOF;
} }
tok->buf = newbuf; tok->buf = newbuf;
tok->cur = tok->buf + cur;
tok->line_start = tok->cur;
tok->inp = tok->buf + curvalid; tok->inp = tok->buf + curvalid;
tok->end = tok->buf + newsize; tok->end = tok->buf + newsize;
tok->start = curstart < 0 ? NULL : tok->start = curstart < 0 ? NULL :
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment