Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
7b969843
Commit
7b969843
authored
Sep 23, 2010
by
Antoine Pitrou
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #1675951: Allow GzipFile to work with unseekable file objects.
Patch by Florian Festi.
parent
dda7fdf1
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
101 additions
and
18 deletions
+101
-18
Doc/library/gzip.rst
Doc/library/gzip.rst
+3
-0
Lib/gzip.py
Lib/gzip.py
+73
-18
Lib/test/test_gzip.py
Lib/test/test_gzip.py
+21
-0
Misc/ACKS
Misc/ACKS
+1
-0
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Doc/library/gzip.rst
View file @
7b969843
...
@@ -74,6 +74,9 @@ The module defines the following items:
...
@@ -74,6 +74,9 @@ The module defines the following items:
.. versionchanged:: 3.2
.. versionchanged:: 3.2
Support for zero-padded files was added.
Support for zero-padded files was added.
.. versionchanged:: 3.2
Support for unseekable files was added.
.. function:: open(filename, mode='rb', compresslevel=9)
.. function:: open(filename, mode='rb', compresslevel=9)
...
...
Lib/gzip.py
View file @
7b969843
...
@@ -45,6 +45,62 @@ def open(filename, mode="rb", compresslevel=9):
...
@@ -45,6 +45,62 @@ def open(filename, mode="rb", compresslevel=9):
"""
"""
return
GzipFile
(
filename
,
mode
,
compresslevel
)
return
GzipFile
(
filename
,
mode
,
compresslevel
)
class
_PaddedFile
:
"""Minimal read-only file object that prepends a string to the contents
of an actual file. Shouldn't be used outside of gzip.py, as it lacks
essential functionality."""
def
__init__
(
self
,
f
,
prepend
=
b''
):
self
.
_buffer
=
prepend
self
.
_length
=
len
(
prepend
)
self
.
file
=
f
self
.
_read
=
0
def
read
(
self
,
size
):
if
self
.
_read
is
None
:
return
self
.
file
.
read
(
size
)
if
self
.
_read
+
size
<=
self
.
_length
:
read
=
self
.
_read
self
.
_read
+=
size
return
self
.
_buffer
[
read
:
self
.
_read
]
else
:
read
=
self
.
_read
self
.
_read
=
None
return
self
.
_buffer
[
read
:]
+
\
self
.
file
.
read
(
size
-
self
.
_length
+
read
)
def
prepend
(
self
,
prepend
=
b''
,
readprevious
=
False
):
if
self
.
_read
is
None
:
self
.
_buffer
=
prepend
elif
readprevious
and
len
(
prepend
)
<=
self
.
_read
:
self
.
_read
-=
len
(
prepend
)
return
else
:
self
.
_buffer
=
self
.
_buffer
[
read
:]
+
prepend
self
.
_length
=
len
(
self
.
_buffer
)
self
.
_read
=
0
def
unused
(
self
):
if
self
.
_read
is
None
:
return
b''
return
self
.
_buffer
[
self
.
_read
:]
def
seek
(
self
,
offset
,
whence
=
0
):
# This is only ever called with offset=whence=0
if
whence
==
1
and
self
.
_read
is
not
None
:
if
0
<=
offset
+
self
.
_read
<=
self
.
_length
:
self
.
_read
+=
offset
return
else
:
offset
+=
self
.
_length
-
self
.
_read
self
.
_read
=
None
self
.
_buffer
=
None
return
self
.
file
.
seek
(
offset
,
whence
)
def
__getattr__
(
self
,
name
):
return
getattr
(
name
,
self
.
file
)
class
GzipFile
(
io
.
BufferedIOBase
):
class
GzipFile
(
io
.
BufferedIOBase
):
"""The GzipFile class simulates most of the methods of a file object with
"""The GzipFile class simulates most of the methods of a file object with
the exception of the readinto() and truncate() methods.
the exception of the readinto() and truncate() methods.
...
@@ -119,6 +175,7 @@ class GzipFile(io.BufferedIOBase):
...
@@ -119,6 +175,7 @@ class GzipFile(io.BufferedIOBase):
self
.
name
=
filename
self
.
name
=
filename
# Starts small, scales exponentially
# Starts small, scales exponentially
self
.
min_readsize
=
100
self
.
min_readsize
=
100
fileobj
=
_PaddedFile
(
fileobj
)
elif
mode
[
0
:
1
]
==
'w'
or
mode
[
0
:
1
]
==
'a'
:
elif
mode
[
0
:
1
]
==
'w'
or
mode
[
0
:
1
]
==
'a'
:
self
.
mode
=
WRITE
self
.
mode
=
WRITE
...
@@ -188,6 +245,9 @@ class GzipFile(io.BufferedIOBase):
...
@@ -188,6 +245,9 @@ class GzipFile(io.BufferedIOBase):
def
_read_gzip_header
(
self
):
def
_read_gzip_header
(
self
):
magic
=
self
.
fileobj
.
read
(
2
)
magic
=
self
.
fileobj
.
read
(
2
)
if
magic
==
b''
:
raise
EOFError
(
"Reached EOF"
)
if
magic
!=
b'
\
037
\
213
'
:
if
magic
!=
b'
\
037
\
213
'
:
raise
IOError
(
'Not a gzipped file'
)
raise
IOError
(
'Not a gzipped file'
)
method
=
ord
(
self
.
fileobj
.
read
(
1
)
)
method
=
ord
(
self
.
fileobj
.
read
(
1
)
)
...
@@ -219,6 +279,11 @@ class GzipFile(io.BufferedIOBase):
...
@@ -219,6 +279,11 @@ class GzipFile(io.BufferedIOBase):
if
flag
&
FHCRC
:
if
flag
&
FHCRC
:
self
.
fileobj
.
read
(
2
)
# Read & discard the 16-bit header CRC
self
.
fileobj
.
read
(
2
)
# Read & discard the 16-bit header CRC
unused
=
self
.
fileobj
.
unused
()
if
unused
:
uncompress
=
self
.
decompress
.
decompress
(
unused
)
self
.
_add_read_data
(
uncompress
)
def
write
(
self
,
data
):
def
write
(
self
,
data
):
if
self
.
mode
!=
WRITE
:
if
self
.
mode
!=
WRITE
:
import
errno
import
errno
...
@@ -282,16 +347,6 @@ class GzipFile(io.BufferedIOBase):
...
@@ -282,16 +347,6 @@ class GzipFile(io.BufferedIOBase):
if
self
.
_new_member
:
if
self
.
_new_member
:
# If the _new_member flag is set, we have to
# If the _new_member flag is set, we have to
# jump to the next member, if there is one.
# jump to the next member, if there is one.
#
# First, check if we're at the end of the file;
# if so, it's time to stop; no more members to read.
pos
=
self
.
fileobj
.
tell
()
# Save current position
self
.
fileobj
.
seek
(
0
,
2
)
# Seek to end of file
if
pos
==
self
.
fileobj
.
tell
():
raise
EOFError
(
"Reached EOF"
)
else
:
self
.
fileobj
.
seek
(
pos
)
# Return to original position
self
.
_init_read
()
self
.
_init_read
()
self
.
_read_gzip_header
()
self
.
_read_gzip_header
()
self
.
decompress
=
zlib
.
decompressobj
(
-
zlib
.
MAX_WBITS
)
self
.
decompress
=
zlib
.
decompressobj
(
-
zlib
.
MAX_WBITS
)
...
@@ -305,6 +360,9 @@ class GzipFile(io.BufferedIOBase):
...
@@ -305,6 +360,9 @@ class GzipFile(io.BufferedIOBase):
if
buf
==
b""
:
if
buf
==
b""
:
uncompress
=
self
.
decompress
.
flush
()
uncompress
=
self
.
decompress
.
flush
()
# Prepend the already read bytes to the fileobj to they can be
# seen by _read_eof()
self
.
fileobj
.
prepend
(
self
.
decompress
.
unused_data
,
True
)
self
.
_read_eof
()
self
.
_read_eof
()
self
.
_add_read_data
(
uncompress
)
self
.
_add_read_data
(
uncompress
)
raise
EOFError
(
'Reached EOF'
)
raise
EOFError
(
'Reached EOF'
)
...
@@ -316,10 +374,9 @@ class GzipFile(io.BufferedIOBase):
...
@@ -316,10 +374,9 @@ class GzipFile(io.BufferedIOBase):
# Ending case: we've come to the end of a member in the file,
# Ending case: we've come to the end of a member in the file,
# so seek back to the start of the unused data, finish up
# so seek back to the start of the unused data, finish up
# this member, and read a new gzip header.
# this member, and read a new gzip header.
# (The number of bytes to seek back is the length of the unused
# Prepend the already read bytes to the fileobj to they can be
# data, minus 8 because _read_eof() will rewind a further 8 bytes)
# seen by _read_eof() and _read_gzip_header()
self
.
fileobj
.
seek
(
-
len
(
self
.
decompress
.
unused_data
)
+
8
,
1
)
self
.
fileobj
.
prepend
(
self
.
decompress
.
unused_data
,
True
)
# Check the CRC and file size, and set the flag so we read
# Check the CRC and file size, and set the flag so we read
# a new member on the next call
# a new member on the next call
self
.
_read_eof
()
self
.
_read_eof
()
...
@@ -334,12 +391,10 @@ class GzipFile(io.BufferedIOBase):
...
@@ -334,12 +391,10 @@ class GzipFile(io.BufferedIOBase):
self
.
size
=
self
.
size
+
len
(
data
)
self
.
size
=
self
.
size
+
len
(
data
)
def
_read_eof
(
self
):
def
_read_eof
(
self
):
# We've read to the end of the file, so we have to rewind in order
# We've read to the end of the file
# to reread the 8 bytes containing the CRC and the file size.
# We check the that the computed CRC and size of the
# We check the that the computed CRC and size of the
# uncompressed data matches the stored values. Note that the size
# uncompressed data matches the stored values. Note that the size
# stored is the true file size mod 2**32.
# stored is the true file size mod 2**32.
self
.
fileobj
.
seek
(
-
8
,
1
)
crc32
=
read32
(
self
.
fileobj
)
crc32
=
read32
(
self
.
fileobj
)
isize
=
read32
(
self
.
fileobj
)
# may exceed 2GB
isize
=
read32
(
self
.
fileobj
)
# may exceed 2GB
if
crc32
!=
self
.
crc
:
if
crc32
!=
self
.
crc
:
...
@@ -355,7 +410,7 @@ class GzipFile(io.BufferedIOBase):
...
@@ -355,7 +410,7 @@ class GzipFile(io.BufferedIOBase):
while
c
==
b"
\
x00
"
:
while
c
==
b"
\
x00
"
:
c
=
self
.
fileobj
.
read
(
1
)
c
=
self
.
fileobj
.
read
(
1
)
if
c
:
if
c
:
self
.
fileobj
.
seek
(
-
1
,
1
)
self
.
fileobj
.
prepend
(
c
,
True
)
@
property
@
property
def
closed
(
self
):
def
closed
(
self
):
...
...
Lib/test/test_gzip.py
View file @
7b969843
...
@@ -22,6 +22,17 @@ data2 = b"""/* zlibmodule.c -- gzip-compatible data compression */
...
@@ -22,6 +22,17 @@ data2 = b"""/* zlibmodule.c -- gzip-compatible data compression */
"""
"""
class
UnseekableIO
(
io
.
BytesIO
):
def
seekable
(
self
):
return
False
def
tell
(
self
):
raise
io
.
UnsupportedOperation
def
seek
(
self
,
*
args
):
raise
io
.
UnsupportedOperation
class
TestGzip
(
unittest
.
TestCase
):
class
TestGzip
(
unittest
.
TestCase
):
filename
=
support
.
TESTFN
filename
=
support
.
TESTFN
...
@@ -265,6 +276,16 @@ class TestGzip(unittest.TestCase):
...
@@ -265,6 +276,16 @@ class TestGzip(unittest.TestCase):
d
=
f
.
read
()
d
=
f
.
read
()
self
.
assertEqual
(
d
,
data1
*
50
,
"Incorrect data in file"
)
self
.
assertEqual
(
d
,
data1
*
50
,
"Incorrect data in file"
)
def
test_non_seekable_file
(
self
):
uncompressed
=
data1
*
50
buf
=
UnseekableIO
()
with
gzip
.
GzipFile
(
fileobj
=
buf
,
mode
=
"wb"
)
as
f
:
f
.
write
(
uncompressed
)
compressed
=
buf
.
getvalue
()
buf
=
UnseekableIO
(
compressed
)
with
gzip
.
GzipFile
(
fileobj
=
buf
,
mode
=
"rb"
)
as
f
:
self
.
assertEqual
(
f
.
read
(),
uncompressed
)
# Testing compress/decompress shortcut functions
# Testing compress/decompress shortcut functions
def
test_compress
(
self
):
def
test_compress
(
self
):
...
...
Misc/ACKS
View file @
7b969843
...
@@ -260,6 +260,7 @@ Bill Fancher
...
@@ -260,6 +260,7 @@ Bill Fancher
Mark Favas
Mark Favas
Niels Ferguson
Niels Ferguson
Sebastian Fernandez
Sebastian Fernandez
Florian Festi
Vincent Fiack
Vincent Fiack
Tomer Filiba
Tomer Filiba
Jeffrey Finkelstein
Jeffrey Finkelstein
...
...
Misc/NEWS
View file @
7b969843
...
@@ -62,6 +62,9 @@ Core and Builtins
...
@@ -62,6 +62,9 @@ Core and Builtins
Library
Library
-------
-------
- Issue #1675951: Allow GzipFile to work with unseekable file objects.
Patch by Florian Festi.
- Logging: Added QueueListener class to facilitate logging usage for
- Logging: Added QueueListener class to facilitate logging usage for
performance-critical threads.
performance-critical threads.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment