Commit e36199b4 authored by Benjamin Peterson's avatar Benjamin Peterson

fix several compile() issues by translating newlines in the tokenizer

parent c4cd6d37
...@@ -173,11 +173,10 @@ available. They are listed here in alphabetical order. ...@@ -173,11 +173,10 @@ available. They are listed here in alphabetical order.
.. note:: .. note::
When compiling a string with multi-line statements, line endings must be When compiling a string with multi-line statements in ``'single'`` or
represented by a single newline character (``'\n'``), and the input must ``'eval'`` mode, input must be terminated by at least one newline
be terminated by at least one newline character. If line endings are character. This is to facilitate detection of incomplete and complete
represented by ``'\r\n'``, use :meth:`str.replace` to change them into statements in the :mod:`code` module.
``'\n'``.
.. versionchanged:: 2.3 .. versionchanged:: 2.3
The *flags* and *dont_inherit* arguments were added. The *flags* and *dont_inherit* arguments were added.
...@@ -185,6 +184,10 @@ available. They are listed here in alphabetical order. ...@@ -185,6 +184,10 @@ available. They are listed here in alphabetical order.
.. versionchanged:: 2.6 .. versionchanged:: 2.6
Support for compiling AST objects. Support for compiling AST objects.
.. versionchanged:: 2.7
Allowed use of Windows and Mac newlines. Also input in ``'exec'`` mode
does not have to end in a newline anymore.
.. function:: complex([real[, imag]]) .. function:: complex([real[, imag]])
......
...@@ -295,10 +295,6 @@ class CodeopTests(unittest.TestCase): ...@@ -295,10 +295,6 @@ class CodeopTests(unittest.TestCase):
self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename, self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename,
compile("a = 1\n", "def", 'single').co_filename) compile("a = 1\n", "def", 'single').co_filename)
def test_no_universal_newlines(self):
code = compile_command("'\rfoo\r'", symbol='eval')
self.assertEqual(eval(code), '\rfoo\r')
def test_main(): def test_main():
run_unittest(CodeopTests) run_unittest(CodeopTests)
......
...@@ -5,6 +5,19 @@ from test import test_support ...@@ -5,6 +5,19 @@ from test import test_support
class TestSpecifics(unittest.TestCase): class TestSpecifics(unittest.TestCase):
def test_no_ending_newline(self):
compile("hi", "<test>", "exec")
compile("hi\r", "<test>", "exec")
def test_empty(self):
compile("", "<test>", "exec")
def test_other_newlines(self):
compile("\r\n", "<test>", "exec")
compile("\r", "<test>", "exec")
compile("hi\r\nstuff\r\ndef f():\n pass\r", "<test>", "exec")
compile("this_is\rreally_old_mac\rdef f():\n pass", "<test>", "exec")
def test_debug_assignment(self): def test_debug_assignment(self):
# catch assignments to __debug__ # catch assignments to __debug__
self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single') self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single')
......
...@@ -243,9 +243,9 @@ class RoundtripLegalSyntaxTestCase(unittest.TestCase): ...@@ -243,9 +243,9 @@ class RoundtripLegalSyntaxTestCase(unittest.TestCase):
(14, '+', 2, 13), (14, '+', 2, 13),
(2, '1', 2, 15), (2, '1', 2, 15),
(4, '', 2, 16), (4, '', 2, 16),
(6, '', 2, -1), (6, '', 3, -1),
(4, '', 2, -1), (4, '', 3, -1),
(0, '', 2, -1)], (0, '', 3, -1)],
terminals) terminals)
......
...@@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1 ...@@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1
Core and Builtins Core and Builtins
----------------- -----------------
- Fix several issues with compile(). The input can now contain Windows and Mac
newlines and is no longer required to end in a newline.
- Remove length limitation when constructing a complex number from a - Remove length limitation when constructing a complex number from a
unicode string. unicode string.
......
...@@ -51,7 +51,7 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, ...@@ -51,7 +51,7 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
initerr(err_ret, filename); initerr(err_ret, filename);
if ((tok = PyTokenizer_FromString(s)) == NULL) { if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) {
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL; return NULL;
} }
......
...@@ -105,6 +105,7 @@ tok_new(void) ...@@ -105,6 +105,7 @@ tok_new(void)
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
tok->done = E_OK; tok->done = E_OK;
tok->fp = NULL; tok->fp = NULL;
tok->input = NULL;
tok->tabsize = TABSIZE; tok->tabsize = TABSIZE;
tok->indent = 0; tok->indent = 0;
tok->indstack[0] = 0; tok->indstack[0] = 0;
...@@ -130,6 +131,17 @@ tok_new(void) ...@@ -130,6 +131,17 @@ tok_new(void)
return tok; return tok;
} }
static char *
new_string(const char *s, Py_ssize_t len)
{
char* result = (char *)PyMem_MALLOC(len + 1);
if (result != NULL) {
memcpy(result, s, len);
result[len] = '\0';
}
return result;
}
#ifdef PGEN #ifdef PGEN
static char * static char *
...@@ -144,10 +156,10 @@ decoding_feof(struct tok_state *tok) ...@@ -144,10 +156,10 @@ decoding_feof(struct tok_state *tok)
return feof(tok->fp); return feof(tok->fp);
} }
static const char * static char *
decode_str(const char *str, struct tok_state *tok) decode_str(const char *str, int exec_input, struct tok_state *tok)
{ {
return str; return new_string(str, strlen(str));
} }
#else /* PGEN */ #else /* PGEN */
...@@ -162,16 +174,6 @@ error_ret(struct tok_state *tok) /* XXX */ ...@@ -162,16 +174,6 @@ error_ret(struct tok_state *tok) /* XXX */
return NULL; /* as if it were EOF */ return NULL; /* as if it were EOF */
} }
static char *
new_string(const char *s, Py_ssize_t len)
{
char* result = (char *)PyMem_MALLOC(len + 1);
if (result != NULL) {
memcpy(result, s, len);
result[len] = '\0';
}
return result;
}
static char * static char *
get_normal_name(char *s) /* for utf-8 and latin-1 */ get_normal_name(char *s) /* for utf-8 and latin-1 */
...@@ -586,17 +588,63 @@ translate_into_utf8(const char* str, const char* enc) { ...@@ -586,17 +588,63 @@ translate_into_utf8(const char* str, const char* enc) {
} }
#endif #endif
static char *
translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
int skip_next_lf = 0, length = strlen(s), final_length;
char *buf, *current;
char c;
buf = PyMem_MALLOC(length + 2);
if (buf == NULL) {
tok->done = E_NOMEM;
return NULL;
}
for (current = buf; (c = *s++);) {
if (skip_next_lf) {
skip_next_lf = 0;
if (c == '\n') {
c = *s;
s++;
if (!c)
break;
}
}
if (c == '\r') {
skip_next_lf = 1;
c = '\n';
}
*current = c;
current++;
}
/* If this is exec input, add a newline to the end of the file if
there isn't one already. */
if (exec_input && *current != '\n') {
*current = '\n';
current++;
}
*current = '\0';
final_length = current - buf;
if (final_length < length && final_length)
/* should never fail */
buf = PyMem_REALLOC(buf, final_length + 1);
return buf;
}
/* Decode a byte string STR for use as the buffer of TOK. /* Decode a byte string STR for use as the buffer of TOK.
Look for encoding declarations inside STR, and record them Look for encoding declarations inside STR, and record them
inside TOK. */ inside TOK. */
static const char * static const char *
decode_str(const char *str, struct tok_state *tok) decode_str(const char *input, int single, struct tok_state *tok)
{ {
PyObject* utf8 = NULL; PyObject* utf8 = NULL;
const char *str;
const char *s; const char *s;
const char *newl[2] = {NULL, NULL}; const char *newl[2] = {NULL, NULL};
int lineno = 0; int lineno = 0;
tok->input = str = translate_newlines(input, single, tok);
if (str == NULL)
return NULL;
tok->enc = NULL; tok->enc = NULL;
tok->str = str; tok->str = str;
if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
...@@ -651,12 +699,12 @@ decode_str(const char *str, struct tok_state *tok) ...@@ -651,12 +699,12 @@ decode_str(const char *str, struct tok_state *tok)
/* Set up tokenizer for string */ /* Set up tokenizer for string */
struct tok_state * struct tok_state *
PyTokenizer_FromString(const char *str) PyTokenizer_FromString(const char *str, int exec_input)
{ {
struct tok_state *tok = tok_new(); struct tok_state *tok = tok_new();
if (tok == NULL) if (tok == NULL)
return NULL; return NULL;
str = (char *)decode_str(str, tok); str = (char *)decode_str(str, exec_input, tok);
if (str == NULL) { if (str == NULL) {
PyTokenizer_Free(tok); PyTokenizer_Free(tok);
return NULL; return NULL;
...@@ -702,6 +750,8 @@ PyTokenizer_Free(struct tok_state *tok) ...@@ -702,6 +750,8 @@ PyTokenizer_Free(struct tok_state *tok)
#endif #endif
if (tok->fp != NULL && tok->buf != NULL) if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf); PyMem_FREE(tok->buf);
if (tok->input)
PyMem_FREE((char *)tok->input);
PyMem_FREE(tok); PyMem_FREE(tok);
} }
......
...@@ -52,9 +52,10 @@ struct tok_state { ...@@ -52,9 +52,10 @@ struct tok_state {
#endif #endif
const char* enc; const char* enc;
const char* str; const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
}; };
extern struct tok_state *PyTokenizer_FromString(const char *); extern struct tok_state *PyTokenizer_FromString(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *); extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *);
extern void PyTokenizer_Free(struct tok_state *); extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, char **, char **); extern int PyTokenizer_Get(struct tok_state *, char **, char **);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment