Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
58c0752a
Commit
58c0752a
authored
Nov 09, 2010
by
Victor Stinner
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #10335: Add tokenize.open(), detect the file encoding using
tokenize.detect_encoding() and open it in read only mode.
parent
ae4836df
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
54 additions
and
22 deletions
+54
-22
Doc/library/tokenize.rst
Doc/library/tokenize.rst
+9
-8
Lib/linecache.py
Lib/linecache.py
+1
-3
Lib/py_compile.py
Lib/py_compile.py
+1
-3
Lib/tabnanny.py
Lib/tabnanny.py
+1
-4
Lib/test/test_tokenize.py
Lib/test/test_tokenize.py
+22
-1
Lib/tokenize.py
Lib/tokenize.py
+15
-0
Lib/trace.py
Lib/trace.py
+2
-3
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Doc/library/tokenize.rst
View file @
58c0752a
...
...
@@ -101,14 +101,16 @@ function it uses to do this is available:
If no encoding is specified, then the default of ``'utf-8'`` will be
returned.
:func:`detect_encoding` is useful for robustly reading Python source files.
A common pattern for this follows::
Use :func:`open` to open Python source files: it uses
:func:`detect_encoding` to detect the file encoding.
def read_python_source(file_name):
with open(file_name, "rb") as fp:
encoding = tokenize.detect_encoding(fp.readline)[0]
with open(file_name, "r", encoding=encoding) as fp:
return fp.read()
.. function:: open(filename)
Open a file in read only mode using the encoding detected by
:func:`detect_encoding`.
.. versionadded:: 3.2
Example of a script rewriter that transforms float literals into Decimal
...
...
@@ -153,4 +155,3 @@ objects::
result.append((toknum, tokval))
return untokenize(result).decode('utf-8')
Lib/linecache.py
View file @
58c0752a
...
...
@@ -123,9 +123,7 @@ def updatecache(filename, module_globals=None):
else
:
return
[]
try
:
with
open
(
fullname
,
'rb'
)
as
fp
:
coding
,
line
=
tokenize
.
detect_encoding
(
fp
.
readline
)
with
open
(
fullname
,
'r'
,
encoding
=
coding
)
as
fp
:
with
tokenize
.
open
(
fullname
)
as
fp
:
lines
=
fp
.
readlines
()
except
IOError
:
return
[]
...
...
Lib/py_compile.py
View file @
58c0752a
...
...
@@ -104,9 +104,7 @@ def compile(file, cfile=None, dfile=None, doraise=False):
byte-compile all installed files (or all files in selected
directories).
"""
with
open
(
file
,
"rb"
)
as
f
:
encoding
=
tokenize
.
detect_encoding
(
f
.
readline
)[
0
]
with
open
(
file
,
encoding
=
encoding
)
as
f
:
with
tokenize
.
open
(
file
)
as
f
:
try
:
timestamp
=
int
(
os
.
fstat
(
f
.
fileno
()).
st_mtime
)
except
AttributeError
:
...
...
Lib/tabnanny.py
View file @
58c0752a
...
...
@@ -93,11 +93,8 @@ def check(file):
check
(
fullname
)
return
with
open
(
file
,
'rb'
)
as
f
:
encoding
,
lines
=
tokenize
.
detect_encoding
(
f
.
readline
)
try
:
f
=
open
(
file
,
encoding
=
encoding
)
f
=
tokenize
.
open
(
file
)
except
IOError
as
msg
:
errprint
(
"%r: I/O Error: %s"
%
(
file
,
msg
))
return
...
...
Lib/test/test_tokenize.py
View file @
58c0752a
...
...
@@ -564,7 +564,8 @@ Non-ascii identifiers
from
test
import
support
from
tokenize
import
(
tokenize
,
_tokenize
,
untokenize
,
NUMBER
,
NAME
,
OP
,
STRING
,
ENDMARKER
,
tok_name
,
detect_encoding
)
STRING
,
ENDMARKER
,
tok_name
,
detect_encoding
,
open
as
tokenize_open
)
from
io
import
BytesIO
from
unittest
import
TestCase
import
os
,
sys
,
glob
...
...
@@ -857,6 +858,26 @@ class TestDetectEncoding(TestCase):
readline
=
self
.
get_readline
((
b'# coding: bad
\
n
'
,))
self
.
assertRaises
(
SyntaxError
,
detect_encoding
,
readline
)
def
test_open
(
self
):
filename
=
support
.
TESTFN
+
'.py'
self
.
addCleanup
(
support
.
unlink
,
filename
)
# test coding cookie
for
encoding
in
(
'iso-8859-15'
,
'utf-8'
):
with
open
(
filename
,
'w'
,
encoding
=
encoding
)
as
fp
:
print
(
"# coding: %s"
%
encoding
,
file
=
fp
)
print
(
"print('euro:
\
u20ac
')"
,
file
=
fp
)
with
tokenize_open
(
filename
)
as
fp
:
assert
fp
.
encoding
==
encoding
assert
fp
.
mode
==
'r'
# test BOM (no coding cookie)
with
open
(
filename
,
'w'
,
encoding
=
'utf-8-sig'
)
as
fp
:
print
(
"print('euro:
\
u20ac
')"
,
file
=
fp
)
with
tokenize_open
(
filename
)
as
fp
:
assert
fp
.
encoding
==
'utf-8-sig'
assert
fp
.
mode
==
'r'
class
TestTokenize
(
TestCase
):
def
test_tokenize
(
self
):
...
...
Lib/tokenize.py
View file @
58c0752a
...
...
@@ -29,6 +29,7 @@ import sys
from
token
import
*
from
codecs
import
lookup
,
BOM_UTF8
import
collections
from
io
import
TextIOWrapper
cookie_re
=
re
.
compile
(
"coding[:=]
\
s*([-
\
w.]+)"
)
import
token
...
...
@@ -335,6 +336,20 @@ def detect_encoding(readline):
return
default
,
[
first
,
second
]
_builtin_open
=
open
def
open
(
filename
):
"""Open a file in read only mode using the encoding detected by
detect_encoding().
"""
buffer
=
_builtin_open
(
filename
,
'rb'
)
encoding
,
lines
=
detect_encoding
(
buffer
.
readline
)
buffer
.
seek
(
0
)
text
=
TextIOWrapper
(
buffer
,
encoding
,
line_buffering
=
True
)
text
.
mode
=
'r'
return
text
def
tokenize
(
readline
):
"""
The tokenize() generator requires one argment, readline, which
...
...
Lib/trace.py
View file @
58c0752a
...
...
@@ -432,10 +432,9 @@ def find_strings(filename, encoding=None):
def
find_executable_linenos
(
filename
):
"""Return dict where keys are line numbers in the line number table."""
try
:
with
io
.
FileIO
(
filename
,
'r'
)
as
file
:
encoding
,
lines
=
tokenize
.
detect_encoding
(
file
.
readline
)
with
open
(
filename
,
"r"
,
encoding
=
encoding
)
as
f
:
with
tokenize
.
open
(
filename
)
as
f
:
prog
=
f
.
read
()
encoding
=
f
.
encoding
except
IOError
as
err
:
print
((
"Not printing coverage data for %r: %s"
%
(
filename
,
err
)),
file
=
sys
.
stderr
)
...
...
Misc/NEWS
View file @
58c0752a
...
...
@@ -60,6 +60,9 @@ Core and Builtins
Library
-------
- Issue #10335: Add tokenize.open(), detect the file encoding using
tokenize.detect_encoding() and open it in read only mode.
- Issue #10321: Added support for binary data to smtplib.SMTP.sendmail,
and a new method send_message to send an email.message.Message object.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment