Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
93b061bc
Commit
93b061bc
authored
May 12, 2013
by
Georg Brandl
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #1159051: Back out a fix for handling corrupted gzip files that
broke backwards compatibility.
parent
a9217a42
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
41 additions
and
68 deletions
+41
-68
Lib/gzip.py
Lib/gzip.py
+38
-35
Lib/test/test_bz2.py
Lib/test/test_bz2.py
+0
-18
Lib/test/test_gzip.py
Lib/test/test_gzip.py
+0
-13
Misc/NEWS
Misc/NEWS
+3
-2
No files found.
Lib/gzip.py
View file @
93b061bc
...
...
@@ -33,6 +33,9 @@ def write32u(output, value):
# or unsigned.
output
.
write
(
struct
.
pack
(
"<L"
,
value
))
def
read32
(
input
):
return
struct
.
unpack
(
"<I"
,
input
.
read
(
4
))[
0
]
def
open
(
filename
,
mode
=
"rb"
,
compresslevel
=
9
):
"""Shorthand for GzipFile(filename, mode, compresslevel).
...
...
@@ -256,32 +259,27 @@ class GzipFile(io.BufferedIOBase):
self
.
crc
=
zlib
.
crc32
(
b""
)
&
0xffffffff
self
.
size
=
0
def
_read_exact
(
self
,
n
):
data
=
self
.
fileobj
.
read
(
n
)
while
len
(
data
)
<
n
:
b
=
self
.
fileobj
.
read
(
n
-
len
(
data
))
if
not
b
:
raise
EOFError
(
"Compressed file ended before the "
"end-of-stream marker was reached"
)
data
+=
b
return
data
def
_read_gzip_header
(
self
):
magic
=
self
.
fileobj
.
read
(
2
)
if
magic
==
b''
:
r
eturn
False
r
aise
EOFError
(
"Reached EOF"
)
if
magic
!=
b'
\
037
\
213
'
:
raise
IOError
(
'Not a gzipped file'
)
method
,
flag
,
self
.
mtime
=
struct
.
unpack
(
"<BBIxx"
,
self
.
_read_exact
(
8
))
method
=
ord
(
self
.
fileobj
.
read
(
1
)
)
if
method
!=
8
:
raise
IOError
(
'Unknown compression method'
)
flag
=
ord
(
self
.
fileobj
.
read
(
1
)
)
self
.
mtime
=
read32
(
self
.
fileobj
)
# extraflag = self.fileobj.read(1)
# os = self.fileobj.read(1)
self
.
fileobj
.
read
(
2
)
if
flag
&
FEXTRA
:
# Read & discard the extra field, if present
extra_len
,
=
struct
.
unpack
(
"<H"
,
self
.
_read_exact
(
2
))
self
.
_read_exact
(
extra_len
)
xlen
=
ord
(
self
.
fileobj
.
read
(
1
))
xlen
=
xlen
+
256
*
ord
(
self
.
fileobj
.
read
(
1
))
self
.
fileobj
.
read
(
xlen
)
if
flag
&
FNAME
:
# Read and discard a null-terminated string containing the filename
while
True
:
...
...
@@ -295,13 +293,12 @@ class GzipFile(io.BufferedIOBase):
if
not
s
or
s
==
b'
\
000
'
:
break
if
flag
&
FHCRC
:
self
.
_read_exact
(
2
)
# Read & discard the 16-bit header CRC
self
.
fileobj
.
read
(
2
)
# Read & discard the 16-bit header CRC
unused
=
self
.
fileobj
.
unused
()
if
unused
:
uncompress
=
self
.
decompress
.
decompress
(
unused
)
self
.
_add_read_data
(
uncompress
)
return
True
def
write
(
self
,
data
):
self
.
_check_closed
()
...
...
@@ -335,16 +332,20 @@ class GzipFile(io.BufferedIOBase):
readsize
=
1024
if
size
<
0
:
# get the whole thing
while
self
.
_read
(
readsize
):
readsize
=
min
(
self
.
max_read_chunk
,
readsize
*
2
)
size
=
self
.
extrasize
try
:
while
True
:
self
.
_read
(
readsize
)
readsize
=
min
(
self
.
max_read_chunk
,
readsize
*
2
)
except
EOFError
:
size
=
self
.
extrasize
else
:
# just get some more of it
while
size
>
self
.
extrasize
:
if
not
self
.
_read
(
readsize
):
if
size
>
self
.
extrasize
:
size
=
self
.
extrasize
break
readsize
=
min
(
self
.
max_read_chunk
,
readsize
*
2
)
try
:
while
size
>
self
.
extrasize
:
self
.
_read
(
readsize
)
readsize
=
min
(
self
.
max_read_chunk
,
readsize
*
2
)
except
EOFError
:
if
size
>
self
.
extrasize
:
size
=
self
.
extrasize
offset
=
self
.
offset
-
self
.
extrastart
chunk
=
self
.
extrabuf
[
offset
:
offset
+
size
]
...
...
@@ -365,9 +366,12 @@ class GzipFile(io.BufferedIOBase):
if
self
.
extrasize
==
0
:
if
self
.
fileobj
is
None
:
return
b''
# Ensure that we don't return b"" if we haven't reached EOF.
# 1024 is the same buffering heuristic used in read()
while
self
.
extrasize
==
0
and
self
.
_read
(
max
(
n
,
1024
)):
try
:
# Ensure that we don't return b"" if we haven't reached EOF.
while
self
.
extrasize
==
0
:
# 1024 is the same buffering heuristic used in read()
self
.
_read
(
max
(
n
,
1024
))
except
EOFError
:
pass
offset
=
self
.
offset
-
self
.
extrastart
remaining
=
self
.
extrasize
...
...
@@ -380,14 +384,13 @@ class GzipFile(io.BufferedIOBase):
def
_read
(
self
,
size
=
1024
):
if
self
.
fileobj
is
None
:
r
eturn
False
r
aise
EOFError
(
"Reached EOF"
)
if
self
.
_new_member
:
# If the _new_member flag is set, we have to
# jump to the next member, if there is one.
self
.
_init_read
()
if
not
self
.
_read_gzip_header
():
return
False
self
.
_read_gzip_header
()
self
.
decompress
=
zlib
.
decompressobj
(
-
zlib
.
MAX_WBITS
)
self
.
_new_member
=
False
...
...
@@ -404,7 +407,7 @@ class GzipFile(io.BufferedIOBase):
self
.
fileobj
.
prepend
(
self
.
decompress
.
unused_data
,
True
)
self
.
_read_eof
()
self
.
_add_read_data
(
uncompress
)
r
eturn
False
r
aise
EOFError
(
'Reached EOF'
)
uncompress
=
self
.
decompress
.
decompress
(
buf
)
self
.
_add_read_data
(
uncompress
)
...
...
@@ -420,7 +423,6 @@ class GzipFile(io.BufferedIOBase):
# a new member on the next call
self
.
_read_eof
()
self
.
_new_member
=
True
return
True
def
_add_read_data
(
self
,
data
):
self
.
crc
=
zlib
.
crc32
(
data
,
self
.
crc
)
&
0xffffffff
...
...
@@ -435,7 +437,8 @@ class GzipFile(io.BufferedIOBase):
# We check the that the computed CRC and size of the
# uncompressed data matches the stored values. Note that the size
# stored is the true file size mod 2**32.
crc32
,
isize
=
struct
.
unpack
(
"<II"
,
self
.
_read_exact
(
8
))
crc32
=
read32
(
self
.
fileobj
)
isize
=
read32
(
self
.
fileobj
)
# may exceed 2GB
if
crc32
!=
self
.
crc
:
raise
IOError
(
"CRC check failed %s != %s"
%
(
hex
(
crc32
),
hex
(
self
.
crc
)))
...
...
Lib/test/test_bz2.py
View file @
93b061bc
...
...
@@ -292,24 +292,6 @@ class BZ2FileTest(BaseTest):
self
.
assertRaises
(
ValueError
,
f
.
readline
)
self
.
assertRaises
(
ValueError
,
f
.
readlines
)
def
test_read_truncated
(
self
):
# Drop the eos_magic field (6 bytes) and CRC (4 bytes).
truncated
=
self
.
DATA
[:
-
10
]
with
open
(
self
.
filename
,
'wb'
)
as
f
:
f
.
write
(
truncated
)
with
BZ2File
(
self
.
filename
)
as
f
:
self
.
assertRaises
(
EOFError
,
f
.
read
)
with
BZ2File
(
self
.
filename
)
as
f
:
self
.
assertEqual
(
f
.
read
(
len
(
self
.
TEXT
)),
self
.
TEXT
)
self
.
assertRaises
(
EOFError
,
f
.
read
,
1
)
# Incomplete 4-byte file header, and block header of at least 146 bits.
for
i
in
range
(
22
):
with
open
(
self
.
filename
,
'wb'
)
as
f
:
f
.
write
(
truncated
[:
i
])
with
BZ2File
(
self
.
filename
)
as
f
:
self
.
assertRaises
(
EOFError
,
f
.
read
,
1
)
class
BZ2CompressorTest
(
BaseTest
):
def
testCompress
(
self
):
# "Test BZ2Compressor.compress()/flush()"
...
...
Lib/test/test_gzip.py
100644 → 100755
View file @
93b061bc
...
...
@@ -365,19 +365,6 @@ class TestGzip(unittest.TestCase):
datac
=
gzip
.
compress
(
data
)
self
.
assertEqual
(
gzip
.
decompress
(
datac
),
data
)
def
test_read_truncated
(
self
):
data
=
data1
*
50
# Drop the CRC (4 bytes) and file size (4 bytes).
truncated
=
gzip
.
compress
(
data
)[:
-
8
]
with
gzip
.
GzipFile
(
fileobj
=
io
.
BytesIO
(
truncated
))
as
f
:
self
.
assertRaises
(
EOFError
,
f
.
read
)
with
gzip
.
GzipFile
(
fileobj
=
io
.
BytesIO
(
truncated
))
as
f
:
self
.
assertEqual
(
f
.
read
(
len
(
data
)),
data
)
self
.
assertRaises
(
EOFError
,
f
.
read
,
1
)
# Incomplete 10-byte header.
for
i
in
range
(
2
,
10
):
with
gzip
.
GzipFile
(
fileobj
=
io
.
BytesIO
(
truncated
[:
i
]))
as
f
:
self
.
assertRaises
(
EOFError
,
f
.
read
,
1
)
def
test_read_with_extra
(
self
):
# Gzip data with an extra field
...
...
Misc/NEWS
View file @
93b061bc
...
...
@@ -14,11 +14,12 @@ Library
which were omitted in 3.2.4 when updating the bundled version of
libffi used by ctypes.
- Issue #17666: Fix reading gzip files with an extra field.
- Issue #15535: Fix namedtuple pickles which were picking up the OrderedDict
instead of just the underlying tuple.
- Issue #1159051: Back out a fix for handling corrupted gzip files that
broke backwards compatibility.
Build
-----
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment