Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
e36199b4
Commit
e36199b4
authored
Nov 12, 2009
by
Benjamin Peterson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix several compile() issues by translating newlines in the tokenizer
parent
c4cd6d37
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
96 additions
and
30 deletions
+96
-30
Doc/library/functions.rst
Doc/library/functions.rst
+8
-5
Lib/test/test_codeop.py
Lib/test/test_codeop.py
+0
-4
Lib/test/test_compile.py
Lib/test/test_compile.py
+13
-0
Lib/test/test_parser.py
Lib/test/test_parser.py
+3
-3
Misc/NEWS
Misc/NEWS
+3
-0
Parser/parsetok.c
Parser/parsetok.c
+1
-1
Parser/tokenizer.c
Parser/tokenizer.c
+66
-16
Parser/tokenizer.h
Parser/tokenizer.h
+2
-1
No files found.
Doc/library/functions.rst
View file @
e36199b4
...
...
@@ -173,11 +173,10 @@ available. They are listed here in alphabetical order.
.. note::
When compiling a string with multi-line statements, line endings must be
represented by a single newline character (``'\n'``), and the input must
be terminated by at least one newline character. If line endings are
represented by ``'\r\n'``, use :meth:`str.replace` to change them into
``'\n'``.
When compiling a string with multi-line statements in ``'single'`` or
``'eval'`` mode, input must be terminated by at least one newline
character. This is to facilitate detection of incomplete and complete
statements in the :mod:`code` module.
.. versionchanged:: 2.3
The *flags* and *dont_inherit* arguments were added.
...
...
@@ -185,6 +184,10 @@ available. They are listed here in alphabetical order.
.. versionchanged:: 2.6
Support for compiling AST objects.
.. versionchanged:: 2.7
Allowed use of Windows and Mac newlines. Also input in ``'exec'`` mode
does not have to end in a newline anymore.
.. function:: complex([real[, imag]])
...
...
Lib/test/test_codeop.py
View file @
e36199b4
...
...
@@ -295,10 +295,6 @@ class CodeopTests(unittest.TestCase):
self
.
assertNotEquals
(
compile_command
(
"a = 1
\
n
"
,
"abc"
).
co_filename
,
compile
(
"a = 1
\
n
"
,
"def"
,
'single'
).
co_filename
)
def
test_no_universal_newlines
(
self
):
code
=
compile_command
(
"'
\
r
foo
\
r
'"
,
symbol
=
'eval'
)
self
.
assertEqual
(
eval
(
code
),
'
\
r
foo
\
r
'
)
def
test_main
():
run_unittest
(
CodeopTests
)
...
...
Lib/test/test_compile.py
View file @
e36199b4
...
...
@@ -5,6 +5,19 @@ from test import test_support
class
TestSpecifics
(
unittest
.
TestCase
):
def
test_no_ending_newline
(
self
):
compile
(
"hi"
,
"<test>"
,
"exec"
)
compile
(
"hi
\
r
"
,
"<test>"
,
"exec"
)
def
test_empty
(
self
):
compile
(
""
,
"<test>"
,
"exec"
)
def
test_other_newlines
(
self
):
compile
(
"
\
r
\
n
"
,
"<test>"
,
"exec"
)
compile
(
"
\
r
"
,
"<test>"
,
"exec"
)
compile
(
"hi
\
r
\
n
stuff
\
r
\
n
def f():
\
n
pass
\
r
"
,
"<test>"
,
"exec"
)
compile
(
"this_is
\
r
really_old_mac
\
r
def f():
\
n
pass"
,
"<test>"
,
"exec"
)
def
test_debug_assignment
(
self
):
# catch assignments to __debug__
self
.
assertRaises
(
SyntaxError
,
compile
,
'__debug__ = 1'
,
'?'
,
'single'
)
...
...
Lib/test/test_parser.py
View file @
e36199b4
...
...
@@ -243,9 +243,9 @@ class RoundtripLegalSyntaxTestCase(unittest.TestCase):
(
14
,
'+'
,
2
,
13
),
(
2
,
'1'
,
2
,
15
),
(
4
,
''
,
2
,
16
),
(
6
,
''
,
2
,
-
1
),
(
4
,
''
,
2
,
-
1
),
(
0
,
''
,
2
,
-
1
)],
(
6
,
''
,
3
,
-
1
),
(
4
,
''
,
3
,
-
1
),
(
0
,
''
,
3
,
-
1
)],
terminals
)
...
...
Misc/NEWS
View file @
e36199b4
...
...
@@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1
Core and Builtins
-----------------
- Fix several issues with compile(). The input can now contain Windows and Mac
newlines and is no longer required to end in a newline.
- Remove length limitation when constructing a complex number from a
unicode string.
...
...
Parser/parsetok.c
View file @
e36199b4
...
...
@@ -51,7 +51,7 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
initerr
(
err_ret
,
filename
);
if
((
tok
=
PyTokenizer_FromString
(
s
))
==
NULL
)
{
if
((
tok
=
PyTokenizer_FromString
(
s
,
start
==
file_input
))
==
NULL
)
{
err_ret
->
error
=
PyErr_Occurred
()
?
E_DECODE
:
E_NOMEM
;
return
NULL
;
}
...
...
Parser/tokenizer.c
View file @
e36199b4
...
...
@@ -105,6 +105,7 @@ tok_new(void)
tok
->
buf
=
tok
->
cur
=
tok
->
end
=
tok
->
inp
=
tok
->
start
=
NULL
;
tok
->
done
=
E_OK
;
tok
->
fp
=
NULL
;
tok
->
input
=
NULL
;
tok
->
tabsize
=
TABSIZE
;
tok
->
indent
=
0
;
tok
->
indstack
[
0
]
=
0
;
...
...
@@ -130,6 +131,17 @@ tok_new(void)
return
tok
;
}
static
char
*
new_string
(
const
char
*
s
,
Py_ssize_t
len
)
{
char
*
result
=
(
char
*
)
PyMem_MALLOC
(
len
+
1
);
if
(
result
!=
NULL
)
{
memcpy
(
result
,
s
,
len
);
result
[
len
]
=
'\0'
;
}
return
result
;
}
#ifdef PGEN
static
char
*
...
...
@@ -144,10 +156,10 @@ decoding_feof(struct tok_state *tok)
return
feof
(
tok
->
fp
);
}
static
c
onst
c
har
*
decode_str
(
const
char
*
str
,
struct
tok_state
*
tok
)
static
char
*
decode_str
(
const
char
*
str
,
int
exec_input
,
struct
tok_state
*
tok
)
{
return
str
;
return
new_string
(
str
,
strlen
(
str
))
;
}
#else
/* PGEN */
...
...
@@ -162,16 +174,6 @@ error_ret(struct tok_state *tok) /* XXX */
return
NULL
;
/* as if it were EOF */
}
static
char
*
new_string
(
const
char
*
s
,
Py_ssize_t
len
)
{
char
*
result
=
(
char
*
)
PyMem_MALLOC
(
len
+
1
);
if
(
result
!=
NULL
)
{
memcpy
(
result
,
s
,
len
);
result
[
len
]
=
'\0'
;
}
return
result
;
}
static
char
*
get_normal_name
(
char
*
s
)
/* for utf-8 and latin-1 */
...
...
@@ -586,17 +588,63 @@ translate_into_utf8(const char* str, const char* enc) {
}
#endif
static
char
*
translate_newlines
(
const
char
*
s
,
int
exec_input
,
struct
tok_state
*
tok
)
{
int
skip_next_lf
=
0
,
length
=
strlen
(
s
),
final_length
;
char
*
buf
,
*
current
;
char
c
;
buf
=
PyMem_MALLOC
(
length
+
2
);
if
(
buf
==
NULL
)
{
tok
->
done
=
E_NOMEM
;
return
NULL
;
}
for
(
current
=
buf
;
(
c
=
*
s
++
);)
{
if
(
skip_next_lf
)
{
skip_next_lf
=
0
;
if
(
c
==
'\n'
)
{
c
=
*
s
;
s
++
;
if
(
!
c
)
break
;
}
}
if
(
c
==
'\r'
)
{
skip_next_lf
=
1
;
c
=
'\n'
;
}
*
current
=
c
;
current
++
;
}
/* If this is exec input, add a newline to the end of the file if
there isn't one already. */
if
(
exec_input
&&
*
current
!=
'\n'
)
{
*
current
=
'\n'
;
current
++
;
}
*
current
=
'\0'
;
final_length
=
current
-
buf
;
if
(
final_length
<
length
&&
final_length
)
/* should never fail */
buf
=
PyMem_REALLOC
(
buf
,
final_length
+
1
);
return
buf
;
}
/* Decode a byte string STR for use as the buffer of TOK.
Look for encoding declarations inside STR, and record them
inside TOK. */
static
const
char
*
decode_str
(
const
char
*
str
,
struct
tok_state
*
tok
)
decode_str
(
const
char
*
input
,
int
single
,
struct
tok_state
*
tok
)
{
PyObject
*
utf8
=
NULL
;
const
char
*
str
;
const
char
*
s
;
const
char
*
newl
[
2
]
=
{
NULL
,
NULL
};
int
lineno
=
0
;
tok
->
input
=
str
=
translate_newlines
(
input
,
single
,
tok
);
if
(
str
==
NULL
)
return
NULL
;
tok
->
enc
=
NULL
;
tok
->
str
=
str
;
if
(
!
check_bom
(
buf_getc
,
buf_ungetc
,
buf_setreadl
,
tok
))
...
...
@@ -651,12 +699,12 @@ decode_str(const char *str, struct tok_state *tok)
/* Set up tokenizer for string */
struct
tok_state
*
PyTokenizer_FromString
(
const
char
*
str
)
PyTokenizer_FromString
(
const
char
*
str
,
int
exec_input
)
{
struct
tok_state
*
tok
=
tok_new
();
if
(
tok
==
NULL
)
return
NULL
;
str
=
(
char
*
)
decode_str
(
str
,
tok
);
str
=
(
char
*
)
decode_str
(
str
,
exec_input
,
tok
);
if
(
str
==
NULL
)
{
PyTokenizer_Free
(
tok
);
return
NULL
;
...
...
@@ -702,6 +750,8 @@ PyTokenizer_Free(struct tok_state *tok)
#endif
if
(
tok
->
fp
!=
NULL
&&
tok
->
buf
!=
NULL
)
PyMem_FREE
(
tok
->
buf
);
if
(
tok
->
input
)
PyMem_FREE
((
char
*
)
tok
->
input
);
PyMem_FREE
(
tok
);
}
...
...
Parser/tokenizer.h
View file @
e36199b4
...
...
@@ -52,9 +52,10 @@ struct tok_state {
#endif
const
char
*
enc
;
const
char
*
str
;
const
char
*
input
;
/* Tokenizer's newline translated copy of the string. */
};
extern
struct
tok_state
*
PyTokenizer_FromString
(
const
char
*
);
extern
struct
tok_state
*
PyTokenizer_FromString
(
const
char
*
,
int
);
extern
struct
tok_state
*
PyTokenizer_FromFile
(
FILE
*
,
char
*
,
char
*
);
extern
void
PyTokenizer_Free
(
struct
tok_state
*
);
extern
int
PyTokenizer_Get
(
struct
tok_state
*
,
char
**
,
char
**
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment