Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
55b43388
Commit
55b43388
authored
May 27, 2011
by
Nadeem Vawda
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #1625: BZ2File and bz2.decompress() now support multi-stream files.
Initial patch by Nir Aides.
parent
c556e10b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
161 additions
and
15 deletions
+161
-15
Lib/bz2.py
Lib/bz2.py
+34
-13
Lib/test/test_bz2.py
Lib/test/test_bz2.py
+124
-2
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Lib/bz2.py
View file @
55b43388
...
...
@@ -76,6 +76,10 @@ class BZ2File(io.BufferedIOBase):
mode
=
"wb"
mode_code
=
_MODE_WRITE
self
.
_compressor
=
BZ2Compressor
()
elif
mode
in
(
"a"
,
"ab"
):
mode
=
"ab"
mode_code
=
_MODE_WRITE
self
.
_compressor
=
BZ2Compressor
()
else
:
raise
ValueError
(
"Invalid mode: {!r}"
.
format
(
mode
))
...
...
@@ -161,14 +165,25 @@ class BZ2File(io.BufferedIOBase):
def
_fill_buffer
(
self
):
if
self
.
_buffer
:
return
True
if
self
.
_decompressor
.
eof
:
self
.
_mode
=
_MODE_READ_EOF
self
.
_size
=
self
.
_pos
return
False
rawblock
=
self
.
_fp
.
read
(
_BUFFER_SIZE
)
if
self
.
_decompressor
.
unused_data
:
rawblock
=
self
.
_decompressor
.
unused_data
else
:
rawblock
=
self
.
_fp
.
read
(
_BUFFER_SIZE
)
if
not
rawblock
:
raise
EOFError
(
"Compressed file ended before the "
"end-of-stream marker was reached"
)
if
self
.
_decompressor
.
eof
:
self
.
_mode
=
_MODE_READ_EOF
self
.
_size
=
self
.
_pos
return
False
else
:
raise
EOFError
(
"Compressed file ended before the "
"end-of-stream marker was reached"
)
# Continue to next stream.
if
self
.
_decompressor
.
eof
:
self
.
_decompressor
=
BZ2Decompressor
()
self
.
_buffer
=
self
.
_decompressor
.
decompress
(
rawblock
)
return
True
...
...
@@ -384,9 +399,15 @@ def decompress(data):
"""
if
len
(
data
)
==
0
:
return
b""
decomp
=
BZ2Decompressor
()
result
=
decomp
.
decompress
(
data
)
if
not
decomp
.
eof
:
raise
ValueError
(
"Compressed data ended before the "
"end-of-stream marker was reached"
)
return
result
result
=
b""
while
True
:
decomp
=
BZ2Decompressor
()
result
+=
decomp
.
decompress
(
data
)
if
not
decomp
.
eof
:
raise
ValueError
(
"Compressed data ended before the "
"end-of-stream marker was reached"
)
if
not
decomp
.
unused_data
:
return
result
# There is unused data left over. Proceed to next stream.
data
=
decomp
.
unused_data
Lib/test/test_bz2.py
View file @
55b43388
...
...
@@ -84,9 +84,9 @@ class BZ2FileTest(BaseTest):
else
:
return
self
.
DATA
def
createTempFile
(
self
,
crlf
=
False
):
def
createTempFile
(
self
,
crlf
=
False
,
streams
=
1
):
with
open
(
self
.
filename
,
"wb"
)
as
f
:
f
.
write
(
self
.
getData
(
crlf
))
f
.
write
(
self
.
getData
(
crlf
)
*
streams
)
def
testRead
(
self
):
# "Test BZ2File.read()"
...
...
@@ -95,6 +95,26 @@ class BZ2FileTest(BaseTest):
self
.
assertRaises
(
TypeError
,
bz2f
.
read
,
None
)
self
.
assertEqual
(
bz2f
.
read
(),
self
.
TEXT
)
def
testReadMultiStream
(
self
):
# "Test BZ2File.read() with a multi stream archive"
self
.
createTempFile
(
streams
=
5
)
with
BZ2File
(
self
.
filename
)
as
bz2f
:
self
.
assertRaises
(
TypeError
,
bz2f
.
read
,
None
)
self
.
assertEqual
(
bz2f
.
read
(),
self
.
TEXT
*
5
)
def
testReadMonkeyMultiStream
(
self
):
# "Test BZ2File.read() with a multi stream archive in which stream"
# "end is alined with internal buffer size"
buffer_size
=
bz2
.
_BUFFER_SIZE
bz2
.
_BUFFER_SIZE
=
len
(
self
.
DATA
)
try
:
self
.
createTempFile
(
streams
=
5
)
with
BZ2File
(
self
.
filename
)
as
bz2f
:
self
.
assertRaises
(
TypeError
,
bz2f
.
read
,
None
)
self
.
assertEqual
(
bz2f
.
read
(),
self
.
TEXT
*
5
)
finally
:
bz2
.
_BUFFER_SIZE
=
buffer_size
def
testRead0
(
self
):
# "Test BBZ2File.read(0)"
self
.
createTempFile
()
...
...
@@ -114,6 +134,18 @@ class BZ2FileTest(BaseTest):
text
+=
str
self
.
assertEqual
(
text
,
self
.
TEXT
)
def
testReadChunk10MultiStream
(
self
):
# "Test BZ2File.read() in chunks of 10 bytes with a multi stream archive"
self
.
createTempFile
(
streams
=
5
)
with
BZ2File
(
self
.
filename
)
as
bz2f
:
text
=
b''
while
1
:
str
=
bz2f
.
read
(
10
)
if
not
str
:
break
text
+=
str
self
.
assertEqual
(
text
,
self
.
TEXT
*
5
)
def
testRead100
(
self
):
# "Test BZ2File.read(100)"
self
.
createTempFile
()
...
...
@@ -151,6 +183,15 @@ class BZ2FileTest(BaseTest):
for
line
in
sio
.
readlines
():
self
.
assertEqual
(
bz2f
.
readline
(),
line
)
def
testReadLineMultiStream
(
self
):
# "Test BZ2File.readline() with a multi stream archive"
self
.
createTempFile
(
streams
=
5
)
with
BZ2File
(
self
.
filename
)
as
bz2f
:
self
.
assertRaises
(
TypeError
,
bz2f
.
readline
,
None
)
sio
=
BytesIO
(
self
.
TEXT
*
5
)
for
line
in
sio
.
readlines
():
self
.
assertEqual
(
bz2f
.
readline
(),
line
)
def
testReadLines
(
self
):
# "Test BZ2File.readlines()"
self
.
createTempFile
()
...
...
@@ -159,6 +200,14 @@ class BZ2FileTest(BaseTest):
sio
=
BytesIO
(
self
.
TEXT
)
self
.
assertEqual
(
bz2f
.
readlines
(),
sio
.
readlines
())
def
testReadLinesMultiStream
(
self
):
# "Test BZ2File.readlines() with a multi stream archive"
self
.
createTempFile
(
streams
=
5
)
with
BZ2File
(
self
.
filename
)
as
bz2f
:
self
.
assertRaises
(
TypeError
,
bz2f
.
readlines
,
None
)
sio
=
BytesIO
(
self
.
TEXT
*
5
)
self
.
assertEqual
(
bz2f
.
readlines
(),
sio
.
readlines
())
def
testIterator
(
self
):
# "Test iter(BZ2File)"
self
.
createTempFile
()
...
...
@@ -166,6 +215,13 @@ class BZ2FileTest(BaseTest):
sio
=
BytesIO
(
self
.
TEXT
)
self
.
assertEqual
(
list
(
iter
(
bz2f
)),
sio
.
readlines
())
def
testIteratorMultiStream
(
self
):
# "Test iter(BZ2File) with a multi stream archive"
self
.
createTempFile
(
streams
=
5
)
with
BZ2File
(
self
.
filename
)
as
bz2f
:
sio
=
BytesIO
(
self
.
TEXT
*
5
)
self
.
assertEqual
(
list
(
iter
(
bz2f
)),
sio
.
readlines
())
def
testClosedIteratorDeadlock
(
self
):
# "Test that iteration on a closed bz2file releases the lock."
# http://bugs.python.org/issue3309
...
...
@@ -217,6 +273,17 @@ class BZ2FileTest(BaseTest):
self
.
assertRaises
(
IOError
,
bz2f
.
write
,
b"a"
)
self
.
assertRaises
(
IOError
,
bz2f
.
writelines
,
[
b"a"
])
def
testAppend
(
self
):
# "Test BZ2File.write()"
with
BZ2File
(
self
.
filename
,
"w"
)
as
bz2f
:
self
.
assertRaises
(
TypeError
,
bz2f
.
write
)
bz2f
.
write
(
self
.
TEXT
)
with
BZ2File
(
self
.
filename
,
"a"
)
as
bz2f
:
self
.
assertRaises
(
TypeError
,
bz2f
.
write
)
bz2f
.
write
(
self
.
TEXT
)
with
open
(
self
.
filename
,
'rb'
)
as
f
:
self
.
assertEqual
(
self
.
decompress
(
f
.
read
()),
self
.
TEXT
*
2
)
def
testSeekForward
(
self
):
# "Test BZ2File.seek(150, 0)"
self
.
createTempFile
()
...
...
@@ -225,6 +292,14 @@ class BZ2FileTest(BaseTest):
bz2f
.
seek
(
150
)
self
.
assertEqual
(
bz2f
.
read
(),
self
.
TEXT
[
150
:])
def
testSeekForwardMultiStream
(
self
):
# "Test BZ2File.seek(150, 0) across stream boundaries"
self
.
createTempFile
(
streams
=
2
)
with
BZ2File
(
self
.
filename
)
as
bz2f
:
self
.
assertRaises
(
TypeError
,
bz2f
.
seek
)
bz2f
.
seek
(
len
(
self
.
TEXT
)
+
150
)
self
.
assertEqual
(
bz2f
.
read
(),
self
.
TEXT
[
150
:])
def
testSeekBackwards
(
self
):
# "Test BZ2File.seek(-150, 1)"
self
.
createTempFile
()
...
...
@@ -233,6 +308,16 @@ class BZ2FileTest(BaseTest):
bz2f
.
seek
(
-
150
,
1
)
self
.
assertEqual
(
bz2f
.
read
(),
self
.
TEXT
[
500
-
150
:])
def
testSeekBackwardsMultiStream
(
self
):
# "Test BZ2File.seek(-150, 1) across stream boundaries"
self
.
createTempFile
(
streams
=
2
)
with
BZ2File
(
self
.
filename
)
as
bz2f
:
readto
=
len
(
self
.
TEXT
)
+
100
while
readto
>
0
:
readto
-=
len
(
bz2f
.
read
(
readto
))
bz2f
.
seek
(
-
150
,
1
)
self
.
assertEqual
(
bz2f
.
read
(),
self
.
TEXT
[
100
-
150
:]
+
self
.
TEXT
)
def
testSeekBackwardsFromEnd
(
self
):
# "Test BZ2File.seek(-150, 2)"
self
.
createTempFile
()
...
...
@@ -240,6 +325,13 @@ class BZ2FileTest(BaseTest):
bz2f
.
seek
(
-
150
,
2
)
self
.
assertEqual
(
bz2f
.
read
(),
self
.
TEXT
[
len
(
self
.
TEXT
)
-
150
:])
def
testSeekBackwardsFromEndMultiStream
(
self
):
# "Test BZ2File.seek(-1000, 2) across stream boundaries"
self
.
createTempFile
(
streams
=
2
)
with
BZ2File
(
self
.
filename
)
as
bz2f
:
bz2f
.
seek
(
-
1000
,
2
)
self
.
assertEqual
(
bz2f
.
read
(),
(
self
.
TEXT
*
2
)[
-
1000
:])
def
testSeekPostEnd
(
self
):
# "Test BZ2File.seek(150000)"
self
.
createTempFile
()
...
...
@@ -248,6 +340,14 @@ class BZ2FileTest(BaseTest):
self
.
assertEqual
(
bz2f
.
tell
(),
len
(
self
.
TEXT
))
self
.
assertEqual
(
bz2f
.
read
(),
b""
)
def
testSeekPostEndMultiStream
(
self
):
# "Test BZ2File.seek(150000)"
self
.
createTempFile
(
streams
=
5
)
with
BZ2File
(
self
.
filename
)
as
bz2f
:
bz2f
.
seek
(
150000
)
self
.
assertEqual
(
bz2f
.
tell
(),
len
(
self
.
TEXT
)
*
5
)
self
.
assertEqual
(
bz2f
.
read
(),
b""
)
def
testSeekPostEndTwice
(
self
):
# "Test BZ2File.seek(150000) twice"
self
.
createTempFile
()
...
...
@@ -257,6 +357,15 @@ class BZ2FileTest(BaseTest):
self
.
assertEqual
(
bz2f
.
tell
(),
len
(
self
.
TEXT
))
self
.
assertEqual
(
bz2f
.
read
(),
b""
)
def
testSeekPostEndTwiceMultiStream
(
self
):
# "Test BZ2File.seek(150000) twice with a multi stream archive"
self
.
createTempFile
(
streams
=
5
)
with
BZ2File
(
self
.
filename
)
as
bz2f
:
bz2f
.
seek
(
150000
)
bz2f
.
seek
(
150000
)
self
.
assertEqual
(
bz2f
.
tell
(),
len
(
self
.
TEXT
)
*
5
)
self
.
assertEqual
(
bz2f
.
read
(),
b""
)
def
testSeekPreStart
(
self
):
# "Test BZ2File.seek(-150, 0)"
self
.
createTempFile
()
...
...
@@ -265,6 +374,14 @@ class BZ2FileTest(BaseTest):
self
.
assertEqual
(
bz2f
.
tell
(),
0
)
self
.
assertEqual
(
bz2f
.
read
(),
self
.
TEXT
)
def
testSeekPreStartMultiStream
(
self
):
# "Test BZ2File.seek(-150, 0) with a multi stream archive"
self
.
createTempFile
(
streams
=
2
)
with
BZ2File
(
self
.
filename
)
as
bz2f
:
bz2f
.
seek
(
-
150
)
self
.
assertEqual
(
bz2f
.
tell
(),
0
)
self
.
assertEqual
(
bz2f
.
read
(),
self
.
TEXT
*
2
)
def
testFileno
(
self
):
# "Test BZ2File.fileno()"
self
.
createTempFile
()
...
...
@@ -510,6 +627,11 @@ class FuncTest(BaseTest):
# "Test decompress() function with incomplete data"
self
.
assertRaises
(
ValueError
,
bz2
.
decompress
,
self
.
DATA
[:
-
10
])
def
testDecompressMultiStream
(
self
):
# "Test decompress() function for data with multiple streams"
text
=
bz2
.
decompress
(
self
.
DATA
*
5
)
self
.
assertEqual
(
text
,
self
.
TEXT
*
5
)
def
test_main
():
support
.
run_unittest
(
BZ2FileTest
,
...
...
Misc/NEWS
View file @
55b43388
...
...
@@ -161,6 +161,9 @@ Core and Builtins
Library
-------
- Issue #1625: BZ2File and bz2.decompress() now support multi-stream files.
Initial patch by Nir Aides.
- Issue #12175: BufferedReader.read(-1) now calls raw.readall() if available.
- Issue #12175: FileIO.readall() now only reads the file position and size
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment