Commit c1f5fff2 authored by Walter Dörwald's avatar Walter Dörwald

Apply SF patch #1101726: Fix buffer overrun in tokenizer.c when a source file

with a PEP 263 encoding declaration results in long decoded line.
parent d7b76e9f
...@@ -334,7 +334,19 @@ check_bom(int get_char(struct tok_state *), ...@@ -334,7 +334,19 @@ check_bom(int get_char(struct tok_state *),
} }
/* Read a line of text from TOK into S, using the stream in TOK. /* Read a line of text from TOK into S, using the stream in TOK.
Return NULL on failure, else S. */ Return NULL on failure, else S.
On entry, tok->decoding_buffer will be one of:
1) NULL: need to call tok->decoding_readline to get a new line
2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
stored the result in tok->decoding_buffer
3) PyStringObject *: previous call to fp_readl did not have enough room
(in the s buffer) to copy entire contents of the line read
by tok->decoding_readline. tok->decoding_buffer has the overflow.
In this case, fp_readl is called in a loop (with an expanded buffer)
until the buffer ends with a '\n' (or until the end of the file is
reached): see tok_nextc and its calls to decoding_fgets.
*/
static char * static char *
fp_readl(char *s, int size, struct tok_state *tok) fp_readl(char *s, int size, struct tok_state *tok)
...@@ -344,32 +356,45 @@ fp_readl(char *s, int size, struct tok_state *tok) ...@@ -344,32 +356,45 @@ fp_readl(char *s, int size, struct tok_state *tok)
Py_FatalError("fp_readl should not be called in this build."); Py_FatalError("fp_readl should not be called in this build.");
return NULL; /* Keep compiler happy (not reachable) */ return NULL; /* Keep compiler happy (not reachable) */
#else #else
PyObject* utf8; PyObject* utf8 = NULL;
PyObject* buf = tok->decoding_buffer; PyObject* buf = tok->decoding_buffer;
char *str;
int utf8len;
/* Ask for one less byte so we can terminate it */
assert(size > 0);
size--;
if (buf == NULL) { if (buf == NULL) {
/* Ask for one less byte so we can terminate it */ buf = PyObject_CallObject(tok->decoding_readline, NULL);
PyObject *args = Py_BuildValue("(i)", size-1);
if (args == NULL)
return error_ret(tok);
buf = PyObject_Call(tok->decoding_readline, args, NULL);
Py_DECREF(args);
if (buf == NULL) if (buf == NULL)
return error_ret(tok); return error_ret(tok);
} else { } else {
tok->decoding_buffer = NULL; tok->decoding_buffer = NULL;
if (PyString_CheckExact(buf))
utf8 = buf;
} }
utf8 = PyUnicode_AsUTF8String(buf); if (utf8 == NULL) {
Py_DECREF(buf); utf8 = PyUnicode_AsUTF8String(buf);
if (utf8 == NULL) Py_DECREF(buf);
return error_ret(tok); if (utf8 == NULL)
else { return error_ret(tok);
const char* str = PyString_AsString(utf8); }
assert(strlen(str) < (size_t)size); /* XXX */ str = PyString_AsString(utf8);
strcpy(s, str); utf8len = PyString_GET_SIZE(utf8);
Py_DECREF(utf8); if (utf8len > size) {
if (s[0] == '\0') return NULL; /* EOF */ tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size);
return s; if (tok->decoding_buffer == NULL) {
Py_DECREF(utf8);
return error_ret(tok);
}
utf8len = size;
} }
memcpy(s, str, utf8len);
s[utf8len] = '\0';
Py_DECREF(utf8);
if (utf8len == 0) return NULL; /* EOF */
return s;
#endif #endif
} }
...@@ -491,14 +516,7 @@ decoding_feof(struct tok_state *tok) ...@@ -491,14 +516,7 @@ decoding_feof(struct tok_state *tok)
} else { } else {
PyObject* buf = tok->decoding_buffer; PyObject* buf = tok->decoding_buffer;
if (buf == NULL) { if (buf == NULL) {
PyObject *args = PyTuple_New(0); buf = PyObject_CallObject(tok->decoding_readline, NULL);
if (args == NULL) {
error_ret(tok);
return 1;
}
buf = PyObject_Call(tok->decoding_readline,
args, NULL);
Py_DECREF(args);
if (buf == NULL) { if (buf == NULL) {
error_ret(tok); error_ret(tok);
return 1; return 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment