Commit aa6ce6f2 authored by Benjamin Peterson's avatar Benjamin Peterson

Merged revisions 76230 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

........
  r76230 | benjamin.peterson | 2009-11-12 17:39:44 -0600 (Thu, 12 Nov 2009) | 2 lines

  fix several compile() issues by translating newlines in the tokenizer
........
parent 7f3c8d86
...@@ -176,11 +176,15 @@ are always available. They are listed here in alphabetical order. ...@@ -176,11 +176,15 @@ are always available. They are listed here in alphabetical order.
.. note:: .. note::
When compiling a string with multi-line statements, line endings must be When compiling a string with multi-line statements in ``'single'`` or
represented by a single newline character (``'\n'``), and the input must ``'eval'`` mode, input must be terminated by at least one newline
be terminated by at least one newline character. If line endings are character. This is to facilitate detection of incomplete and complete
represented by ``'\r\n'``, use :meth:`str.replace` to change them into statements in the :mod:`code` module.
``'\n'``.
.. versionchanged:: 3.2
Allowed use of Windows and Mac newlines. Also input in ``'exec'`` mode
does not have to end in a newline anymore.
.. function:: complex([real[, imag]]) .. function:: complex([real[, imag]])
......
...@@ -295,10 +295,6 @@ class CodeopTests(unittest.TestCase): ...@@ -295,10 +295,6 @@ class CodeopTests(unittest.TestCase):
self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename, self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename,
compile("a = 1\n", "def", 'single').co_filename) compile("a = 1\n", "def", 'single').co_filename)
def test_no_universal_newlines(self):
code = compile_command("'\rfoo\r'", symbol='eval')
self.assertEqual(eval(code), '\rfoo\r')
def test_main(): def test_main():
run_unittest(CodeopTests) run_unittest(CodeopTests)
......
...@@ -5,6 +5,19 @@ from test import support ...@@ -5,6 +5,19 @@ from test import support
class TestSpecifics(unittest.TestCase): class TestSpecifics(unittest.TestCase):
def test_no_ending_newline(self):
compile("hi", "<test>", "exec")
compile("hi\r", "<test>", "exec")
def test_empty(self):
compile("", "<test>", "exec")
def test_other_newlines(self):
compile("\r\n", "<test>", "exec")
compile("\r", "<test>", "exec")
compile("hi\r\nstuff\r\ndef f():\n pass\r", "<test>", "exec")
compile("this_is\rreally_old_mac\rdef f():\n pass", "<test>", "exec")
def test_debug_assignment(self): def test_debug_assignment(self):
# catch assignments to __debug__ # catch assignments to __debug__
self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single') self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single')
......
...@@ -237,9 +237,9 @@ class RoundtripLegalSyntaxTestCase(unittest.TestCase): ...@@ -237,9 +237,9 @@ class RoundtripLegalSyntaxTestCase(unittest.TestCase):
(14, '+', 2, 13), (14, '+', 2, 13),
(2, '1', 2, 15), (2, '1', 2, 15),
(4, '', 2, 16), (4, '', 2, 16),
(6, '', 2, -1), (6, '', 3, -1),
(4, '', 2, -1), (4, '', 3, -1),
(0, '', 2, -1)], (0, '', 3, -1)],
terminals) terminals)
def test_extended_unpacking(self): def test_extended_unpacking(self):
......
...@@ -26,7 +26,7 @@ class PEP263Test(unittest.TestCase): ...@@ -26,7 +26,7 @@ class PEP263Test(unittest.TestCase):
try: try:
compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec") compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
except SyntaxError as v: except SyntaxError as v:
self.assertEquals(v.text, "print '\u5e74'") self.assertEquals(v.text, "print '\u5e74'\n")
else: else:
self.fail() self.fail()
......
...@@ -46,13 +46,14 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, ...@@ -46,13 +46,14 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
perrdetail *err_ret, int *flags) perrdetail *err_ret, int *flags)
{ {
struct tok_state *tok; struct tok_state *tok;
int exec_input = start == file_input;
initerr(err_ret, filename); initerr(err_ret, filename);
if (*flags & PyPARSE_IGNORE_COOKIE) if (*flags & PyPARSE_IGNORE_COOKIE)
tok = PyTokenizer_FromUTF8(s); tok = PyTokenizer_FromUTF8(s, exec_input);
else else
tok = PyTokenizer_FromString(s); tok = PyTokenizer_FromString(s, exec_input);
if (tok == NULL) { if (tok == NULL) {
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL; return NULL;
......
...@@ -119,6 +119,7 @@ tok_new(void) ...@@ -119,6 +119,7 @@ tok_new(void)
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
tok->done = E_OK; tok->done = E_OK;
tok->fp = NULL; tok->fp = NULL;
tok->input = NULL;
tok->tabsize = TABSIZE; tok->tabsize = TABSIZE;
tok->indent = 0; tok->indent = 0;
tok->indstack[0] = 0; tok->indstack[0] = 0;
...@@ -145,6 +146,17 @@ tok_new(void) ...@@ -145,6 +146,17 @@ tok_new(void)
return tok; return tok;
} }
static char *
new_string(const char *s, Py_ssize_t len)
{
char* result = (char *)PyMem_MALLOC(len + 1);
if (result != NULL) {
memcpy(result, s, len);
result[len] = '\0';
}
return result;
}
#ifdef PGEN #ifdef PGEN
static char * static char *
...@@ -159,10 +171,10 @@ decoding_feof(struct tok_state *tok) ...@@ -159,10 +171,10 @@ decoding_feof(struct tok_state *tok)
return feof(tok->fp); return feof(tok->fp);
} }
static const char * static char *
decode_str(const char *str, struct tok_state *tok) decode_str(const char *str, int exec_input, struct tok_state *tok)
{ {
return str; return new_string(str, strlen(str));
} }
#else /* PGEN */ #else /* PGEN */
...@@ -177,16 +189,6 @@ error_ret(struct tok_state *tok) /* XXX */ ...@@ -177,16 +189,6 @@ error_ret(struct tok_state *tok) /* XXX */
return NULL; /* as if it were EOF */ return NULL; /* as if it were EOF */
} }
static char *
new_string(const char *s, Py_ssize_t len)
{
char* result = (char *)PyMem_MALLOC(len + 1);
if (result != NULL) {
memcpy(result, s, len);
result[len] = '\0';
}
return result;
}
static char * static char *
get_normal_name(char *s) /* for utf-8 and latin-1 */ get_normal_name(char *s) /* for utf-8 and latin-1 */
...@@ -635,17 +637,63 @@ translate_into_utf8(const char* str, const char* enc) { ...@@ -635,17 +637,63 @@ translate_into_utf8(const char* str, const char* enc) {
return utf8; return utf8;
} }
static char *
translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
int skip_next_lf = 0, length = strlen(s), final_length;
char *buf, *current;
char c;
buf = PyMem_MALLOC(length + 2);
if (buf == NULL) {
tok->done = E_NOMEM;
return NULL;
}
for (current = buf; (c = *s++);) {
if (skip_next_lf) {
skip_next_lf = 0;
if (c == '\n') {
c = *s;
s++;
if (!c)
break;
}
}
if (c == '\r') {
skip_next_lf = 1;
c = '\n';
}
*current = c;
current++;
}
/* If this is exec input, add a newline to the end of the file if
there isn't one already. */
if (exec_input && *current != '\n') {
*current = '\n';
current++;
}
*current = '\0';
final_length = current - buf;
if (final_length < length && final_length)
/* should never fail */
buf = PyMem_REALLOC(buf, final_length + 1);
return buf;
}
/* Decode a byte string STR for use as the buffer of TOK. /* Decode a byte string STR for use as the buffer of TOK.
Look for encoding declarations inside STR, and record them Look for encoding declarations inside STR, and record them
inside TOK. */ inside TOK. */
static const char * static const char *
decode_str(const char *str, struct tok_state *tok) decode_str(const char *input, int single, struct tok_state *tok)
{ {
PyObject* utf8 = NULL; PyObject* utf8 = NULL;
const char *str;
const char *s; const char *s;
const char *newl[2] = {NULL, NULL}; const char *newl[2] = {NULL, NULL};
int lineno = 0; int lineno = 0;
tok->input = str = translate_newlines(input, single, tok);
if (str == NULL)
return NULL;
tok->enc = NULL; tok->enc = NULL;
tok->str = str; tok->str = str;
if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
...@@ -696,12 +744,12 @@ decode_str(const char *str, struct tok_state *tok) ...@@ -696,12 +744,12 @@ decode_str(const char *str, struct tok_state *tok)
/* Set up tokenizer for string */ /* Set up tokenizer for string */
struct tok_state * struct tok_state *
PyTokenizer_FromString(const char *str) PyTokenizer_FromString(const char *str, int exec_input)
{ {
struct tok_state *tok = tok_new(); struct tok_state *tok = tok_new();
if (tok == NULL) if (tok == NULL)
return NULL; return NULL;
str = (char *)decode_str(str, tok); str = (char *)decode_str(str, exec_input, tok);
if (str == NULL) { if (str == NULL) {
PyTokenizer_Free(tok); PyTokenizer_Free(tok);
return NULL; return NULL;
...@@ -713,11 +761,18 @@ PyTokenizer_FromString(const char *str) ...@@ -713,11 +761,18 @@ PyTokenizer_FromString(const char *str)
} }
struct tok_state * struct tok_state *
PyTokenizer_FromUTF8(const char *str) PyTokenizer_FromUTF8(const char *str, int exec_input)
{ {
struct tok_state *tok = tok_new(); struct tok_state *tok = tok_new();
if (tok == NULL) if (tok == NULL)
return NULL; return NULL;
#ifndef PGEN
tok->input = str = translate_newlines(str, exec_input, tok);
#endif
if (str == NULL) {
PyTokenizer_Free(tok);
return NULL;
}
tok->decoding_state = STATE_RAW; tok->decoding_state = STATE_RAW;
tok->read_coding_spec = 1; tok->read_coding_spec = 1;
tok->enc = NULL; tok->enc = NULL;
...@@ -734,7 +789,6 @@ PyTokenizer_FromUTF8(const char *str) ...@@ -734,7 +789,6 @@ PyTokenizer_FromUTF8(const char *str)
return tok; return tok;
} }
/* Set up tokenizer for file */ /* Set up tokenizer for file */
struct tok_state * struct tok_state *
...@@ -780,6 +834,8 @@ PyTokenizer_Free(struct tok_state *tok) ...@@ -780,6 +834,8 @@ PyTokenizer_Free(struct tok_state *tok)
#endif #endif
if (tok->fp != NULL && tok->buf != NULL) if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf); PyMem_FREE(tok->buf);
if (tok->input)
PyMem_FREE((char *)tok->input);
PyMem_FREE(tok); PyMem_FREE(tok);
} }
......
...@@ -58,10 +58,11 @@ struct tok_state { ...@@ -58,10 +58,11 @@ struct tok_state {
#endif #endif
const char* enc; /* Encoding for the current str. */ const char* enc; /* Encoding for the current str. */
const char* str; const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
}; };
extern struct tok_state *PyTokenizer_FromString(const char *); extern struct tok_state *PyTokenizer_FromString(const char *, int);
extern struct tok_state *PyTokenizer_FromUTF8(const char *); extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, char*, extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
char *, char *); char *, char *);
extern void PyTokenizer_Free(struct tok_state *); extern void PyTokenizer_Free(struct tok_state *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment