Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
cc2dbc58
Commit
cc2dbc58
authored
Mar 08, 2016
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #15068: Got rid of excessive buffering in the fileinput module.
The bufsize parameter is no longer used.
parent
55e3218e
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
152 additions
and
90 deletions
+152
-90
Doc/library/fileinput.rst
Doc/library/fileinput.rst
+7
-1
Lib/fileinput.py
Lib/fileinput.py
+73
-88
Lib/test/test_fileinput.py
Lib/test/test_fileinput.py
+69
-1
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Doc/library/fileinput.rst
View file @
cc2dbc58
...
...
@@ -71,6 +71,9 @@ The following function is the primary interface of this module:
.. versionchanged:: 3.2
Can be used as a context manager.
.. versionchanged:: 3.5.2
The *bufsize* parameter is no longer used.
The following functions use the global state created by :func:`fileinput.input`;
if there is no active state, :exc:`RuntimeError` is raised.
...
...
@@ -161,7 +164,10 @@ available for subclassing as well:
Can be used as a context manager.
.. deprecated:: 3.4
The ``'rU'`` and ``'U'`` modes.
The ``'rU'`` and ``'U'`` modes.
.. versionchanged:: 3.5.2
The *bufsize* parameter is no longer used.
**Optional in-place filtering:** if the keyword argument ``inplace=True`` is
...
...
Lib/fileinput.py
View file @
cc2dbc58
...
...
@@ -64,13 +64,6 @@ deleted when the output file is closed. In-place filtering is
disabled when standard input is read. XXX The current implementation
does not work for MS-DOS 8+3 filesystems.
Performance: this module is unfortunately one of the slower ways of
processing large numbers of input lines. Nevertheless, a significant
speed-up has been obtained by using readlines(bufsize) instead of
readline(). A new keyword argument, bufsize=N, is present on the
input() function and the FileInput() class to override the default
buffer size.
XXX Possible additions:
- optional getopt argument processing
...
...
@@ -86,6 +79,7 @@ __all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno",
_state
=
None
# No longer used
DEFAULT_BUFSIZE
=
8
*
1024
def
input
(
files
=
None
,
inplace
=
False
,
backup
=
""
,
bufsize
=
0
,
...
...
@@ -207,17 +201,15 @@ class FileInput:
self
.
_files
=
files
self
.
_inplace
=
inplace
self
.
_backup
=
backup
self
.
_bufsize
=
bufsize
or
DEFAULT_BUFSIZE
self
.
_savestdout
=
None
self
.
_output
=
None
self
.
_filename
=
None
self
.
_lineno
=
0
self
.
_
start
lineno
=
0
self
.
_filelineno
=
0
self
.
_file
=
None
self
.
_readline
=
self
.
_start_readline
self
.
_isstdin
=
False
self
.
_backupfilename
=
None
self
.
_buffer
=
[]
self
.
_bufindex
=
0
# restrict mode argument to reading modes
if
mode
not
in
(
'r'
,
'rU'
,
'U'
,
'rb'
):
raise
ValueError
(
"FileInput opening mode must be one of "
...
...
@@ -253,22 +245,18 @@ class FileInput:
return
self
def
__next__
(
self
):
try
:
line
=
self
.
_buffer
[
self
.
_bufindex
]
except
IndexError
:
pass
else
:
self
.
_bufindex
+=
1
self
.
_lineno
+=
1
line
=
self
.
_readline
()
if
line
:
self
.
_filelineno
+=
1
return
line
line
=
self
.
readline
()
if
not
line
:
if
not
self
.
_file
:
raise
StopIteration
return
line
self
.
nextfile
()
# Recursive call
return
self
.
__next__
()
def
__getitem__
(
self
,
i
):
if
i
!=
self
.
_lineno
:
if
i
!=
self
.
lineno
()
:
raise
RuntimeError
(
"accessing lines out of order"
)
try
:
return
self
.
__next__
()
...
...
@@ -289,6 +277,7 @@ class FileInput:
finally
:
file
=
self
.
_file
self
.
_file
=
None
self
.
_readline
=
self
.
_start_readline
try
:
if
file
and
not
self
.
_isstdin
:
file
.
close
()
...
...
@@ -300,85 +289,81 @@ class FileInput:
except
OSError
:
pass
self
.
_isstdin
=
False
self
.
_buffer
=
[]
self
.
_bufindex
=
0
def
readline
(
self
):
try
:
line
=
self
.
_buffer
[
self
.
_bufindex
]
except
IndexError
:
pass
while
True
:
line
=
self
.
_readline
()
if
line
:
self
.
_filelineno
+=
1
return
line
if
not
self
.
_file
:
return
line
self
.
nextfile
()
# repeat with next file
def
_start_readline
(
self
):
if
not
self
.
_files
:
if
'b'
in
self
.
_mode
:
return
b''
else
:
return
''
self
.
_filename
=
self
.
_files
[
0
]
self
.
_files
=
self
.
_files
[
1
:]
self
.
_startlineno
=
self
.
lineno
()
self
.
_filelineno
=
0
self
.
_file
=
None
self
.
_isstdin
=
False
self
.
_backupfilename
=
0
if
self
.
_filename
==
'-'
:
self
.
_filename
=
'<stdin>'
if
'b'
in
self
.
_mode
:
self
.
_file
=
getattr
(
sys
.
stdin
,
'buffer'
,
sys
.
stdin
)
else
:
self
.
_file
=
sys
.
stdin
self
.
_isstdin
=
True
else
:
self
.
_bufindex
+=
1
self
.
_lineno
+=
1
self
.
_filelineno
+=
1
return
line
if
not
self
.
_file
:
if
not
self
.
_files
:
if
'b'
in
self
.
_mode
:
return
b''
if
self
.
_inplace
:
self
.
_backupfilename
=
(
self
.
_filename
+
(
self
.
_backup
or
".bak"
))
try
:
os
.
unlink
(
self
.
_backupfilename
)
except
OSError
:
pass
# The next few lines may raise OSError
os
.
rename
(
self
.
_filename
,
self
.
_backupfilename
)
self
.
_file
=
open
(
self
.
_backupfilename
,
self
.
_mode
)
try
:
perm
=
os
.
fstat
(
self
.
_file
.
fileno
()).
st_mode
except
OSError
:
self
.
_output
=
open
(
self
.
_filename
,
"w"
)
else
:
return
''
self
.
_filename
=
self
.
_files
[
0
]
self
.
_files
=
self
.
_files
[
1
:]
self
.
_filelineno
=
0
self
.
_file
=
None
self
.
_isstdin
=
False
self
.
_backupfilename
=
0
if
self
.
_filename
==
'-'
:
self
.
_filename
=
'<stdin>'
if
'b'
in
self
.
_mode
:
self
.
_file
=
getattr
(
sys
.
stdin
,
'buffer'
,
sys
.
stdin
)
else
:
self
.
_file
=
sys
.
stdin
self
.
_isstdin
=
True
else
:
if
self
.
_inplace
:
self
.
_backupfilename
=
(
self
.
_filename
+
(
self
.
_backup
or
".bak"
))
mode
=
os
.
O_CREAT
|
os
.
O_WRONLY
|
os
.
O_TRUNC
if
hasattr
(
os
,
'O_BINARY'
):
mode
|=
os
.
O_BINARY
fd
=
os
.
open
(
self
.
_filename
,
mode
,
perm
)
self
.
_output
=
os
.
fdopen
(
fd
,
"w"
)
try
:
os
.
unlink
(
self
.
_backupfilename
)
if
hasattr
(
os
,
'chmod'
):
os
.
chmod
(
self
.
_filename
,
perm
)
except
OSError
:
pass
# The next few lines may raise OSError
os
.
rename
(
self
.
_filename
,
self
.
_backupfilename
)
self
.
_file
=
open
(
self
.
_backupfilename
,
self
.
_mode
)
try
:
perm
=
os
.
fstat
(
self
.
_file
.
fileno
()).
st_mode
except
OSError
:
self
.
_output
=
open
(
self
.
_filename
,
"w"
)
else
:
mode
=
os
.
O_CREAT
|
os
.
O_WRONLY
|
os
.
O_TRUNC
if
hasattr
(
os
,
'O_BINARY'
):
mode
|=
os
.
O_BINARY
fd
=
os
.
open
(
self
.
_filename
,
mode
,
perm
)
self
.
_output
=
os
.
fdopen
(
fd
,
"w"
)
try
:
if
hasattr
(
os
,
'chmod'
):
os
.
chmod
(
self
.
_filename
,
perm
)
except
OSError
:
pass
self
.
_savestdout
=
sys
.
stdout
sys
.
stdout
=
self
.
_output
self
.
_savestdout
=
sys
.
stdout
sys
.
stdout
=
self
.
_output
else
:
# This may raise OSError
if
self
.
_openhook
:
self
.
_file
=
self
.
_openhook
(
self
.
_filename
,
self
.
_mode
)
else
:
# This may raise OSError
if
self
.
_openhook
:
self
.
_file
=
self
.
_openhook
(
self
.
_filename
,
self
.
_mode
)
else
:
self
.
_file
=
open
(
self
.
_filename
,
self
.
_mode
)
self
.
_buffer
=
self
.
_file
.
readlines
(
self
.
_bufsize
)
self
.
_bufindex
=
0
if
not
self
.
_buffer
:
self
.
nextfile
()
# Recursive call
return
self
.
readline
()
self
.
_file
=
open
(
self
.
_filename
,
self
.
_mode
)
self
.
_readline
=
self
.
_file
.
readline
return
self
.
_readline
()
def
filename
(
self
):
return
self
.
_filename
def
lineno
(
self
):
return
self
.
_lineno
return
self
.
_
startlineno
+
self
.
_file
lineno
def
filelineno
(
self
):
return
self
.
_filelineno
...
...
Lib/test/test_fileinput.py
View file @
cc2dbc58
...
...
@@ -46,6 +46,42 @@ def remove_tempfiles(*names):
if
name
:
safe_unlink
(
name
)
class
LineReader
:
def
__init__
(
self
):
self
.
_linesread
=
[]
@
property
def
linesread
(
self
):
try
:
return
self
.
_linesread
[:]
finally
:
self
.
_linesread
=
[]
def
openhook
(
self
,
filename
,
mode
):
self
.
it
=
iter
(
filename
.
splitlines
(
True
))
return
self
def
readline
(
self
,
size
=
None
):
line
=
next
(
self
.
it
,
''
)
self
.
_linesread
.
append
(
line
)
return
line
def
readlines
(
self
,
hint
=-
1
):
lines
=
[]
size
=
0
while
True
:
line
=
self
.
readline
()
if
not
line
:
return
lines
lines
.
append
(
line
)
size
+=
len
(
line
)
if
size
>=
hint
:
return
lines
def
close
(
self
):
pass
class
BufferSizesTests
(
unittest
.
TestCase
):
def
test_buffer_sizes
(
self
):
# First, run the tests with default and teeny buffer size.
...
...
@@ -289,7 +325,7 @@ class FileInputTests(unittest.TestCase):
self
.
addCleanup
(
safe_unlink
,
TESTFN
)
with
FileInput
(
files
=
TESTFN
,
openhook
=
hook_encoded
(
'ascii'
)
,
bufsize
=
8
)
as
fi
:
openhook
=
hook_encoded
(
'ascii'
))
as
fi
:
try
:
self
.
assertEqual
(
fi
.
readline
(),
'A
\
n
'
)
self
.
assertEqual
(
fi
.
readline
(),
'B
\
n
'
)
...
...
@@ -457,6 +493,38 @@ class FileInputTests(unittest.TestCase):
self
.
assertEqual
(
result
,
-
1
,
"fileno() should return -1"
)
def
test_readline_buffering
(
self
):
src
=
LineReader
()
with
FileInput
(
files
=
[
'line1
\
n
line2'
,
'line3
\
n
'
],
openhook
=
src
.
openhook
)
as
fi
:
self
.
assertEqual
(
src
.
linesread
,
[])
self
.
assertEqual
(
fi
.
readline
(),
'line1
\
n
'
)
self
.
assertEqual
(
src
.
linesread
,
[
'line1
\
n
'
])
self
.
assertEqual
(
fi
.
readline
(),
'line2'
)
self
.
assertEqual
(
src
.
linesread
,
[
'line2'
])
self
.
assertEqual
(
fi
.
readline
(),
'line3
\
n
'
)
self
.
assertEqual
(
src
.
linesread
,
[
''
,
'line3
\
n
'
])
self
.
assertEqual
(
fi
.
readline
(),
''
)
self
.
assertEqual
(
src
.
linesread
,
[
''
])
self
.
assertEqual
(
fi
.
readline
(),
''
)
self
.
assertEqual
(
src
.
linesread
,
[])
def
test_iteration_buffering
(
self
):
src
=
LineReader
()
with
FileInput
(
files
=
[
'line1
\
n
line2'
,
'line3
\
n
'
],
openhook
=
src
.
openhook
)
as
fi
:
self
.
assertEqual
(
src
.
linesread
,
[])
self
.
assertEqual
(
next
(
fi
),
'line1
\
n
'
)
self
.
assertEqual
(
src
.
linesread
,
[
'line1
\
n
'
])
self
.
assertEqual
(
next
(
fi
),
'line2'
)
self
.
assertEqual
(
src
.
linesread
,
[
'line2'
])
self
.
assertEqual
(
next
(
fi
),
'line3
\
n
'
)
self
.
assertEqual
(
src
.
linesread
,
[
''
,
'line3
\
n
'
])
self
.
assertRaises
(
StopIteration
,
next
,
fi
)
self
.
assertEqual
(
src
.
linesread
,
[
''
])
self
.
assertRaises
(
StopIteration
,
next
,
fi
)
self
.
assertEqual
(
src
.
linesread
,
[])
class
MockFileInput
:
"""A class that mocks out fileinput.FileInput for use during unit tests"""
...
...
Misc/NEWS
View file @
cc2dbc58
...
...
@@ -91,6 +91,9 @@ Core and Builtins
Library
-------
- Issue #15068: Got rid of excessive buffering in the fileinput module.
The bufsize parameter is no longer used.
- Issue #2202: Fix UnboundLocalError in
AbstractDigestAuthHandler.get_algorithm_impls. Initial patch by Mathieu Dupuy.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment