Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
fe7c5b5b
Commit
fe7c5b5b
authored
Apr 05, 2011
by
Victor Stinner
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #9319: Include the filename in "Non-UTF8 code ..." syntax error.
parent
7f2fee36
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
43 additions
and
23 deletions
+43
-23
Lib/test/test_imp.py
Lib/test/test_imp.py
+6
-0
Misc/NEWS
Misc/NEWS
+2
-0
Parser/tokenizer.c
Parser/tokenizer.c
+27
-14
Parser/tokenizer.h
Parser/tokenizer.h
+0
-1
Python/import.c
Python/import.c
+5
-5
Python/traceback.c
Python/traceback.c
+3
-3
No files found.
Lib/test/test_imp.py
View file @
fe7c5b5b
...
...
@@ -58,6 +58,12 @@ class ImportTests(unittest.TestCase):
with
imp
.
find_module
(
'module_'
+
mod
,
self
.
test_path
)[
0
]
as
fd
:
self
.
assertEqual
(
fd
.
encoding
,
encoding
)
path
=
[
os
.
path
.
dirname
(
__file__
)]
self
.
assertRaisesRegex
(
SyntaxError
,
r"Non-UTF-8 code starting with '\\xf6'"
r" in file .*badsyntax_pep3120.py"
,
imp
.
find_module
,
'badsyntax_pep3120'
,
path
)
def
test_issue1267
(
self
):
for
mod
,
encoding
,
_
in
self
.
test_strings
:
fp
,
filename
,
info
=
imp
.
find_module
(
'module_'
+
mod
,
...
...
Misc/NEWS
View file @
fe7c5b5b
...
...
@@ -10,6 +10,8 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins
-----------------
- Issue #9319: Include the filename in "Non-UTF8 code ..." syntax error.
- Issue #10785: Store the filename as Unicode in the Python parser.
- Issue #11619: _PyImport_LoadDynamicModule() doesn'
t
encode
the
path
to
bytes
...
...
Parser/tokenizer.c
View file @
fe7c5b5b
...
...
@@ -1690,17 +1690,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
return
result
;
}
/* Get -*- encoding -*- from a Python file.
/* Get the encoding of a Python file. Check for the coding cookie and check if
the file starts with a BOM.
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
the first or second line of the file (in which case the encoding
should be assumed to be PyUnicode_GetDefaultEncoding()).
PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
encoding in the first or second line of the file (in which case the encoding
should be assumed to be UTF-8).
The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed
by the caller. */
The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
by the caller.
*/
char
*
PyTokenizer_FindEncoding
(
int
fd
)
PyTokenizer_FindEncoding
Filename
(
int
fd
,
PyObject
*
filename
)
{
struct
tok_state
*
tok
;
FILE
*
fp
;
...
...
@@ -1720,9 +1721,18 @@ PyTokenizer_FindEncoding(int fd)
return
NULL
;
}
#ifndef PGEN
if
(
filename
!=
NULL
)
{
Py_INCREF
(
filename
);
tok
->
filename
=
filename
;
}
else
{
tok
->
filename
=
PyUnicode_FromString
(
"<string>"
);
if
(
tok
->
filename
==
NULL
)
goto
error
;
if
(
tok
->
filename
==
NULL
)
{
fclose
(
fp
);
PyTokenizer_Free
(
tok
);
return
encoding
;
}
}
#endif
while
(
tok
->
lineno
<
2
&&
tok
->
done
==
E_OK
)
{
PyTokenizer_Get
(
tok
,
&
p_start
,
&
p_end
);
...
...
@@ -1733,13 +1743,16 @@ PyTokenizer_FindEncoding(int fd)
if
(
encoding
)
strcpy
(
encoding
,
tok
->
encoding
);
}
#ifndef PGEN
error:
#endif
PyTokenizer_Free
(
tok
);
return
encoding
;
}
char
*
PyTokenizer_FindEncoding
(
int
fd
)
{
return
PyTokenizer_FindEncodingFilename
(
fd
,
NULL
);
}
#ifdef Py_DEBUG
void
...
...
Parser/tokenizer.h
View file @
fe7c5b5b
...
...
@@ -75,7 +75,6 @@ extern void PyTokenizer_Free(struct tok_state *);
extern
int
PyTokenizer_Get
(
struct
tok_state
*
,
char
**
,
char
**
);
extern
char
*
PyTokenizer_RestoreEncoding
(
struct
tok_state
*
tok
,
int
len
,
int
*
offset
);
extern
char
*
PyTokenizer_FindEncoding
(
int
);
#ifdef __cplusplus
}
...
...
Python/import.c
View file @
fe7c5b5b
...
...
@@ -124,12 +124,12 @@ static const Py_UNICODE PYC_TAG_UNICODE[] = {
/* See _PyImport_FixupExtensionObject() below */
static
PyObject
*
extensions
=
NULL
;
/* Function from Parser/tokenizer.c */
extern
char
*
PyTokenizer_FindEncodingFilename
(
int
,
PyObject
*
);
/* This table is defined in config.c: */
extern
struct
_inittab
_PyImport_Inittab
[];
/* Method from Parser/tokenizer.c */
extern
char
*
PyTokenizer_FindEncoding
(
int
);
struct
_inittab
*
PyImport_Inittab
=
_PyImport_Inittab
;
/* these tables define the module suffixes that Python recognizes */
...
...
@@ -3540,9 +3540,9 @@ call_find_module(PyObject *name, PyObject *path_list)
}
if
(
fd
!=
-
1
)
{
if
(
strchr
(
fdp
->
mode
,
'b'
)
==
NULL
)
{
/* PyTokenizer_FindEncoding() returns PyMem_MALLOC'ed
/* PyTokenizer_FindEncoding
Filename
() returns PyMem_MALLOC'ed
memory. */
found_encoding
=
PyTokenizer_FindEncoding
(
fd
);
found_encoding
=
PyTokenizer_FindEncoding
Filename
(
fd
,
pathobj
);
lseek
(
fd
,
0
,
0
);
/* Reset position */
if
(
found_encoding
==
NULL
&&
PyErr_Occurred
())
{
Py_XDECREF
(
pathobj
);
...
...
Python/traceback.c
View file @
fe7c5b5b
...
...
@@ -18,8 +18,8 @@
#define MAX_FRAME_DEPTH 100
#define MAX_NTHREADS 100
/*
Method
from Parser/tokenizer.c */
extern
char
*
PyTokenizer_FindEncoding
(
int
);
/*
Function
from Parser/tokenizer.c */
extern
char
*
PyTokenizer_FindEncoding
Filename
(
int
,
PyObject
*
);
static
PyObject
*
tb_dir
(
PyTracebackObject
*
self
)
...
...
@@ -251,7 +251,7 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent)
/* use the right encoding to decode the file as unicode */
fd
=
PyObject_AsFileDescriptor
(
binary
);
found_encoding
=
PyTokenizer_FindEncoding
(
fd
);
found_encoding
=
PyTokenizer_FindEncoding
Filename
(
fd
,
filename
);
encoding
=
(
found_encoding
!=
NULL
)
?
found_encoding
:
"utf-8"
;
lseek
(
fd
,
0
,
0
);
/* Reset position */
fob
=
PyObject_CallMethod
(
io
,
"TextIOWrapper"
,
"Os"
,
binary
,
encoding
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment