Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
7f2fee36
Commit
7f2fee36
authored
Apr 05, 2011
by
Victor Stinner
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #10785: Store the filename as Unicode in the Python parser.
parent
9bdb43e4
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
94 additions
and
42 deletions
+94
-42
Include/parsetok.h
Include/parsetok.h
+7
-2
Makefile.pre.in
Makefile.pre.in
+4
-3
Misc/NEWS
Misc/NEWS
+2
-0
Modules/parsermodule.c
Modules/parsermodule.c
+1
-0
Parser/parsetok.c
Parser/parsetok.c
+25
-7
Parser/parsetok_pgen.c
Parser/parsetok_pgen.c
+2
-0
Parser/tokenizer.c
Parser/tokenizer.c
+22
-13
Parser/tokenizer.h
Parser/tokenizer.h
+7
-1
Python/pythonrun.c
Python/pythonrun.c
+24
-16
No files found.
Include/parsetok.h
View file @
7f2fee36
...
...
@@ -9,7 +9,10 @@ extern "C" {
typedef
struct
{
int
error
;
const
char
*
filename
;
/* decoded from the filesystem encoding */
#ifndef PGEN
/* The filename is useless for pgen, see comment in tok_state structure */
PyObject
*
filename
;
#endif
int
lineno
;
int
offset
;
char
*
text
;
/* UTF-8-encoded string */
...
...
@@ -66,8 +69,10 @@ PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilenameEx(
perrdetail
*
err_ret
,
int
*
flags
);
/* Note that he following function is defined in pythonrun.c not parsetok.c. */
/* Note that the following functions are defined in pythonrun.c,
not in parsetok.c */
PyAPI_FUNC
(
void
)
PyParser_SetError
(
perrdetail
*
);
PyAPI_FUNC
(
void
)
PyParser_ClearError
(
perrdetail
*
);
#ifdef __cplusplus
}
...
...
Makefile.pre.in
View file @
7f2fee36
...
...
@@ -238,14 +238,13 @@ POBJS= \
Parser/listnode.o
\
Parser/node.o
\
Parser/parser.o
\
Parser/parsetok.o
\
Parser/bitset.o
\
Parser/metagrammar.o
\
Parser/firstsets.o
\
Parser/grammar.o
\
Parser/pgen.o
PARSER_OBJS
=
$(POBJS)
Parser/myreadline.o Parser/tokenizer.o
PARSER_OBJS
=
$(POBJS)
Parser/myreadline.o Parser/
parsetok.o Parser/
tokenizer.o
PGOBJS
=
\
Objects/obmalloc.o
\
...
...
@@ -254,10 +253,12 @@ PGOBJS= \
Python/pyctype.o
\
Parser/tokenizer_pgen.o
\
Parser/printgrammar.o
\
Parser/parsetok_pgen.o
\
Parser/pgenmain.o
PARSER_HEADERS
=
\
Parser/parser.h
\
Include/parsetok.h
\
Parser/tokenizer.h
PGENOBJS
=
$(PGENMAIN)
$(POBJS)
$(PGOBJS)
...
...
@@ -593,6 +594,7 @@ Parser/grammar.o: $(srcdir)/Parser/grammar.c \
Parser/metagrammar.o
:
$(srcdir)/Parser/metagrammar.c
Parser/tokenizer_pgen.o
:
$(srcdir)/Parser/tokenizer.c
Parser/parsetok_pgen.o
:
$(srcdir)/Parser/parsetok.c
Parser/pgenmain.o
:
$(srcdir)/Include/parsetok.h
...
...
@@ -700,7 +702,6 @@ PYTHON_HEADERS= \
Include/objimpl.h
\
Include/opcode.h
\
Include/osdefs.h
\
Include/parsetok.h
\
Include/patchlevel.h
\
Include/pgen.h
\
Include/pgenheaders.h
\
...
...
Misc/NEWS
View file @
7f2fee36
...
...
@@ -10,6 +10,8 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins
-----------------
- Issue #10785: Store the filename as Unicode in the Python parser.
- Issue #11619: _PyImport_LoadDynamicModule() doesn'
t
encode
the
path
to
bytes
on
Windows
.
...
...
Modules/parsermodule.c
View file @
7f2fee36
...
...
@@ -584,6 +584,7 @@ parser_do_parse(PyObject *args, PyObject *kw, char *argspec, int type)
else
PyParser_SetError
(
&
err
);
}
PyParser_ClearError
(
&
err
);
return
(
res
);
}
...
...
Parser/parsetok.c
View file @
7f2fee36
...
...
@@ -13,7 +13,7 @@
/* Forward */
static
node
*
parsetok
(
struct
tok_state
*
,
grammar
*
,
int
,
perrdetail
*
,
int
*
);
static
void
initerr
(
perrdetail
*
err_ret
,
const
char
*
filename
);
static
int
initerr
(
perrdetail
*
err_ret
,
const
char
*
filename
);
/* Parse input coming from a string. Return error code, print some errors. */
node
*
...
...
@@ -48,7 +48,8 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
struct
tok_state
*
tok
;
int
exec_input
=
start
==
file_input
;
initerr
(
err_ret
,
filename
);
if
(
initerr
(
err_ret
,
filename
)
<
0
)
return
NULL
;
if
(
*
flags
&
PyPARSE_IGNORE_COOKIE
)
tok
=
PyTokenizer_FromUTF8
(
s
,
exec_input
);
...
...
@@ -59,7 +60,10 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
return
NULL
;
}
tok
->
filename
=
filename
?
filename
:
"<string>"
;
#ifndef PGEN
Py_INCREF
(
err_ret
->
filename
);
tok
->
filename
=
err_ret
->
filename
;
#endif
return
parsetok
(
tok
,
g
,
start
,
err_ret
,
flags
);
}
...
...
@@ -90,13 +94,17 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
{
struct
tok_state
*
tok
;
initerr
(
err_ret
,
filename
);
if
(
initerr
(
err_ret
,
filename
)
<
0
)
return
NULL
;
if
((
tok
=
PyTokenizer_FromFile
(
fp
,
(
char
*
)
enc
,
ps1
,
ps2
))
==
NULL
)
{
err_ret
->
error
=
E_NOMEM
;
return
NULL
;
}
tok
->
filename
=
filename
;
#ifndef PGEN
Py_INCREF
(
err_ret
->
filename
);
tok
->
filename
=
err_ret
->
filename
;
#endif
return
parsetok
(
tok
,
g
,
start
,
err_ret
,
flags
);
}
...
...
@@ -267,14 +275,24 @@ done:
return
n
;
}
static
void
static
int
initerr
(
perrdetail
*
err_ret
,
const
char
*
filename
)
{
err_ret
->
error
=
E_OK
;
err_ret
->
filename
=
filename
;
err_ret
->
lineno
=
0
;
err_ret
->
offset
=
0
;
err_ret
->
text
=
NULL
;
err_ret
->
token
=
-
1
;
err_ret
->
expected
=
-
1
;
#ifndef PGEN
if
(
filename
)
err_ret
->
filename
=
PyUnicode_DecodeFSDefault
(
filename
);
else
err_ret
->
filename
=
PyUnicode_FromString
(
"<string>"
);
if
(
err_ret
->
filename
==
NULL
)
{
err_ret
->
error
=
E_ERROR
;
return
-
1
;
}
#endif
return
0
;
}
Parser/parsetok_pgen.c
0 → 100644
View file @
7f2fee36
#define PGEN
#include "parsetok.c"
Parser/tokenizer.c
View file @
7f2fee36
...
...
@@ -128,7 +128,6 @@ tok_new(void)
tok
->
prompt
=
tok
->
nextprompt
=
NULL
;
tok
->
lineno
=
0
;
tok
->
level
=
0
;
tok
->
filename
=
NULL
;
tok
->
altwarning
=
1
;
tok
->
alterror
=
1
;
tok
->
alttabsize
=
1
;
...
...
@@ -140,6 +139,7 @@ tok_new(void)
tok
->
encoding
=
NULL
;
tok
->
cont_line
=
0
;
#ifndef PGEN
tok
->
filename
=
NULL
;
tok
->
decoding_readline
=
NULL
;
tok
->
decoding_buffer
=
NULL
;
#endif
...
...
@@ -545,7 +545,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
{
char
*
line
=
NULL
;
int
badchar
=
0
;
PyObject
*
filename
;
for
(;;)
{
if
(
tok
->
decoding_state
==
STATE_NORMAL
)
{
/* We already have a codec associated with
...
...
@@ -586,16 +585,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
if
(
badchar
)
{
/* Need to add 1 to the line number, since this line
has not been counted, yet. */
filename
=
PyUnicode_DecodeFSDefault
(
tok
->
filename
);
if
(
filename
!=
NULL
)
{
PyErr_Format
(
PyExc_SyntaxError
,
"Non-UTF-8 code starting with '
\\
x%.2x' "
"in file %U on line %i, "
"but no encoding declared; "
"see http://python.org/dev/peps/pep-0263/ for details"
,
badchar
,
filename
,
tok
->
lineno
+
1
);
Py_DECREF
(
filename
);
}
PyErr_Format
(
PyExc_SyntaxError
,
"Non-UTF-8 code starting with '
\\
x%.2x' "
"in file %U on line %i, "
"but no encoding declared; "
"see http://python.org/dev/peps/pep-0263/ for details"
,
badchar
,
tok
->
filename
,
tok
->
lineno
+
1
);
return
error_ret
(
tok
);
}
#endif
...
...
@@ -853,6 +848,7 @@ PyTokenizer_Free(struct tok_state *tok)
#ifndef PGEN
Py_XDECREF
(
tok
->
decoding_readline
);
Py_XDECREF
(
tok
->
decoding_buffer
);
Py_XDECREF
(
tok
->
filename
);
#endif
if
(
tok
->
fp
!=
NULL
&&
tok
->
buf
!=
NULL
)
PyMem_FREE
(
tok
->
buf
);
...
...
@@ -1247,8 +1243,13 @@ indenterror(struct tok_state *tok)
return
1
;
}
if
(
tok
->
altwarning
)
{
PySys_WriteStderr
(
"%s: inconsistent use of tabs and spaces "
#ifdef PGEN
PySys_WriteStderr
(
"inconsistent use of tabs and spaces "
"in indentation
\n
"
);
#else
PySys_FormatStderr
(
"%U: inconsistent use of tabs and spaces "
"in indentation
\n
"
,
tok
->
filename
);
#endif
tok
->
altwarning
=
0
;
}
return
0
;
...
...
@@ -1718,6 +1719,11 @@ PyTokenizer_FindEncoding(int fd)
fclose
(
fp
);
return
NULL
;
}
#ifndef PGEN
tok
->
filename
=
PyUnicode_FromString
(
"<string>"
);
if
(
tok
->
filename
==
NULL
)
goto
error
;
#endif
while
(
tok
->
lineno
<
2
&&
tok
->
done
==
E_OK
)
{
PyTokenizer_Get
(
tok
,
&
p_start
,
&
p_end
);
}
...
...
@@ -1727,6 +1733,9 @@ PyTokenizer_FindEncoding(int fd)
if
(
encoding
)
strcpy
(
encoding
,
tok
->
encoding
);
}
#ifndef PGEN
error:
#endif
PyTokenizer_Free
(
tok
);
return
encoding
;
}
...
...
Parser/tokenizer.h
View file @
7f2fee36
...
...
@@ -40,7 +40,13 @@ struct tok_state {
int
level
;
/* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
/* Stuff for checking on different tab sizes */
const
char
*
filename
;
/* encoded to the filesystem encoding */
#ifndef PGEN
/* pgen doesn't have access to Python codecs, it cannot decode the input
filename. The bytes filename might be kept, but it is only used by
indenterror() and it is not really needed: pgen only compiles one file
(Grammar/Grammar). */
PyObject
*
filename
;
#endif
int
altwarning
;
/* Issue warning if alternate tabs don't match */
int
alterror
;
/* Issue error if alternate tabs don't match */
int
alttabsize
;
/* Alternate tab spacing */
...
...
Python/pythonrun.c
View file @
7f2fee36
...
...
@@ -62,6 +62,7 @@ static PyObject *run_mod(mod_ty, const char *, PyObject *, PyObject *,
static
PyObject
*
run_pyc_file
(
FILE
*
,
const
char
*
,
PyObject
*
,
PyObject
*
,
PyCompilerFlags
*
);
static
void
err_input
(
perrdetail
*
);
static
void
err_free
(
perrdetail
*
);
static
void
initsigs
(
void
);
static
void
call_py_exitfuncs
(
void
);
static
void
wait_for_thread_shutdown
(
void
);
...
...
@@ -1887,12 +1888,13 @@ PyParser_ASTFromString(const char *s, const char *filename, int start,
flags
->
cf_flags
|=
iflags
&
PyCF_MASK
;
mod
=
PyAST_FromNode
(
n
,
flags
,
filename
,
arena
);
PyNode_Free
(
n
);
return
mod
;
}
else
{
err_input
(
&
err
);
return
NULL
;
mod
=
NULL
;
}
err_free
(
&
err
);
return
mod
;
}
mod_ty
...
...
@@ -1917,14 +1919,15 @@ PyParser_ASTFromFile(FILE *fp, const char *filename, const char* enc,
flags
->
cf_flags
|=
iflags
&
PyCF_MASK
;
mod
=
PyAST_FromNode
(
n
,
flags
,
filename
,
arena
);
PyNode_Free
(
n
);
return
mod
;
}
else
{
err_input
(
&
err
);
if
(
errcode
)
*
errcode
=
err
.
error
;
return
NULL
;
mod
=
NULL
;
}
err_free
(
&
err
);
return
mod
;
}
/* Simplified interface to parsefile -- return node or set exception */
...
...
@@ -1938,6 +1941,7 @@ PyParser_SimpleParseFileFlags(FILE *fp, const char *filename, int start, int fla
start
,
NULL
,
NULL
,
&
err
,
flags
);
if
(
n
==
NULL
)
err_input
(
&
err
);
err_free
(
&
err
);
return
n
;
}
...
...
@@ -1952,6 +1956,7 @@ PyParser_SimpleParseStringFlags(const char *str, int start, int flags)
start
,
&
err
,
flags
);
if
(
n
==
NULL
)
err_input
(
&
err
);
err_free
(
&
err
);
return
n
;
}
...
...
@@ -1964,6 +1969,7 @@ PyParser_SimpleParseStringFlagsFilename(const char *str, const char *filename,
&
_PyParser_Grammar
,
start
,
&
err
,
flags
);
if
(
n
==
NULL
)
err_input
(
&
err
);
err_free
(
&
err
);
return
n
;
}
...
...
@@ -1976,12 +1982,24 @@ PyParser_SimpleParseStringFilename(const char *str, const char *filename, int st
/* May want to move a more generalized form of this to parsetok.c or
even parser modules. */
void
PyParser_ClearError
(
perrdetail
*
err
)
{
err_free
(
err
);
}
void
PyParser_SetError
(
perrdetail
*
err
)
{
err_input
(
err
);
}
static
void
err_free
(
perrdetail
*
err
)
{
Py_CLEAR
(
err
->
filename
);
}
/* Set the error appropriate to the given input error code (see errcode.h) */
static
void
...
...
@@ -1989,7 +2007,6 @@ err_input(perrdetail *err)
{
PyObject
*
v
,
*
w
,
*
errtype
,
*
errtext
;
PyObject
*
msg_obj
=
NULL
;
PyObject
*
filename
;
char
*
msg
=
NULL
;
errtype
=
PyExc_SyntaxError
;
...
...
@@ -2075,17 +2092,8 @@ err_input(perrdetail *err)
errtext
=
PyUnicode_DecodeUTF8
(
err
->
text
,
strlen
(
err
->
text
),
"replace"
);
}
if
(
err
->
filename
!=
NULL
)
filename
=
PyUnicode_DecodeFSDefault
(
err
->
filename
);
else
{
Py_INCREF
(
Py_None
);
filename
=
Py_None
;
}
if
(
filename
!=
NULL
)
v
=
Py_BuildValue
(
"(NiiN)"
,
filename
,
err
->
lineno
,
err
->
offset
,
errtext
);
else
v
=
NULL
;
v
=
Py_BuildValue
(
"(OiiN)"
,
err
->
filename
,
err
->
lineno
,
err
->
offset
,
errtext
);
if
(
v
!=
NULL
)
{
if
(
msg_obj
)
w
=
Py_BuildValue
(
"(OO)"
,
msg_obj
,
v
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment