Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
1211c9a9
Commit
1211c9a9
authored
Jan 20, 2018
by
Serhiy Storchaka
Committed by
GitHub
Jan 20, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bpo-32503: Avoid creating too small frames in pickles. (#5127)
parent
bd5c7d23
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
62 additions
and
51 deletions
+62
-51
Lib/pickle.py
Lib/pickle.py
+7
-5
Lib/test/pickletester.py
Lib/test/pickletester.py
+45
-37
Misc/NEWS.d/next/Library/2018-01-07-09-22-26.bpo-32503.ViMxpD.rst
...S.d/next/Library/2018-01-07-09-22-26.bpo-32503.ViMxpD.rst
+1
-0
Modules/_pickle.c
Modules/_pickle.c
+9
-9
No files found.
Lib/pickle.py
View file @
1211c9a9
...
...
@@ -183,6 +183,7 @@ __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
class
_Framer
:
_FRAME_SIZE_MIN
=
4
_FRAME_SIZE_TARGET
=
64
*
1024
def
__init__
(
self
,
file_write
):
...
...
@@ -203,11 +204,12 @@ class _Framer:
if
f
.
tell
()
>=
self
.
_FRAME_SIZE_TARGET
or
force
:
data
=
f
.
getbuffer
()
write
=
self
.
file_write
# Issue a single call to the write method of the underlying
# file object for the frame opcode with the size of the
# frame. The concatenation is expected to be less expensive
# than issuing an additional call to write.
write
(
FRAME
+
pack
(
"<Q"
,
len
(
data
)))
if
len
(
data
)
>=
self
.
_FRAME_SIZE_MIN
:
# Issue a single call to the write method of the underlying
# file object for the frame opcode with the size of the
# frame. The concatenation is expected to be less expensive
# than issuing an additional call to write.
write
(
FRAME
+
pack
(
"<Q"
,
len
(
data
)))
# Issue a separate call to write to append the frame
# contents without concatenation to the above to avoid a
...
...
Lib/test/pickletester.py
View file @
1211c9a9
...
...
@@ -2037,6 +2037,7 @@ class AbstractPickleTests(unittest.TestCase):
# Exercise framing (proto >= 4) for significant workloads
FRAME_SIZE_MIN
=
4
FRAME_SIZE_TARGET
=
64
*
1024
def
check_frame_opcodes
(
self
,
pickled
):
...
...
@@ -2047,36 +2048,43 @@ class AbstractPickleTests(unittest.TestCase):
framed by default and are therefore considered a frame by themselves in
the following consistency check.
"""
last_arg
=
last_pos
=
last_frame_opcode_size
=
None
frameless_opcode_sizes
=
{
'BINBYTES'
:
5
,
'BINUNICODE'
:
5
,
'BINBYTES8'
:
9
,
'BINUNICODE8'
:
9
,
}
frame_end
=
frameless_start
=
None
frameless_opcodes
=
{
'BINBYTES'
,
'BINUNICODE'
,
'BINBYTES8'
,
'BINUNICODE8'
}
for
op
,
arg
,
pos
in
pickletools
.
genops
(
pickled
):
if
op
.
name
in
frameless_opcode_sizes
:
if
len
(
arg
)
>
self
.
FRAME_SIZE_TARGET
:
frame_opcode_size
=
frameless_opcode_sizes
[
op
.
name
]
arg
=
len
(
arg
)
else
:
continue
elif
op
.
name
==
'FRAME'
:
frame_opcode_size
=
9
else
:
continue
if
last_pos
is
not
None
:
# The previous frame's size should be equal to the number
# of bytes up to the current frame.
frame_size
=
pos
-
last_pos
-
last_frame_opcode_size
self
.
assertEqual
(
frame_size
,
last_arg
)
last_arg
,
last_pos
=
arg
,
pos
last_frame_opcode_size
=
frame_opcode_size
# The last frame's size should be equal to the number of bytes up
# to the pickle's end.
frame_size
=
len
(
pickled
)
-
last_pos
-
last_frame_opcode_size
self
.
assertEqual
(
frame_size
,
last_arg
)
if
frame_end
is
not
None
:
self
.
assertLessEqual
(
pos
,
frame_end
)
if
pos
==
frame_end
:
frame_end
=
None
if
frame_end
is
not
None
:
# framed
self
.
assertNotEqual
(
op
.
name
,
'FRAME'
)
if
op
.
name
in
frameless_opcodes
:
# Only short bytes and str objects should be written
# in a frame
self
.
assertLessEqual
(
len
(
arg
),
self
.
FRAME_SIZE_TARGET
)
else
:
# not framed
if
(
op
.
name
==
'FRAME'
or
(
op
.
name
in
frameless_opcodes
and
len
(
arg
)
>
self
.
FRAME_SIZE_TARGET
)):
# Frame or large bytes or str object
if
frameless_start
is
not
None
:
# Only short data should be written outside of a frame
self
.
assertLess
(
pos
-
frameless_start
,
self
.
FRAME_SIZE_MIN
)
frameless_start
=
None
elif
frameless_start
is
None
and
op
.
name
!=
'PROTO'
:
frameless_start
=
pos
if
op
.
name
==
'FRAME'
:
self
.
assertGreaterEqual
(
arg
,
self
.
FRAME_SIZE_MIN
)
frame_end
=
pos
+
9
+
arg
pos
=
len
(
pickled
)
if
frame_end
is
not
None
:
self
.
assertEqual
(
frame_end
,
pos
)
elif
frameless_start
is
not
None
:
self
.
assertLess
(
pos
-
frameless_start
,
self
.
FRAME_SIZE_MIN
)
def
test_framing_many_objects
(
self
):
obj
=
list
(
range
(
10
**
5
))
...
...
@@ -2095,7 +2103,8 @@ class AbstractPickleTests(unittest.TestCase):
def
test_framing_large_objects
(
self
):
N
=
1024
*
1024
obj
=
[
b'x'
*
N
,
b'y'
*
N
,
'z'
*
N
]
small_items
=
[[
i
]
for
i
in
range
(
10
)]
obj
=
[
b'x'
*
N
,
*
small_items
,
b'y'
*
N
,
'z'
*
N
]
for
proto
in
range
(
4
,
pickle
.
HIGHEST_PROTOCOL
+
1
):
for
fast
in
[
False
,
True
]:
with
self
.
subTest
(
proto
=
proto
,
fast
=
fast
):
...
...
@@ -2119,12 +2128,9 @@ class AbstractPickleTests(unittest.TestCase):
# Perform full equality check if the lengths match.
self
.
assertEqual
(
obj
,
unpickled
)
n_frames
=
count_opcode
(
pickle
.
FRAME
,
pickled
)
if
not
fast
:
# One frame per memoize for each large object.
self
.
assertGreaterEqual
(
n_frames
,
len
(
obj
))
else
:
# One frame at the beginning and one at the end.
self
.
assertGreaterEqual
(
n_frames
,
2
)
# A single frame for small objects between
# first two large objects.
self
.
assertEqual
(
n_frames
,
1
)
self
.
check_frame_opcodes
(
pickled
)
def
test_optional_frames
(
self
):
...
...
@@ -2152,7 +2158,9 @@ class AbstractPickleTests(unittest.TestCase):
frame_size
=
self
.
FRAME_SIZE_TARGET
num_frames
=
20
obj
=
[
bytes
([
i
])
*
frame_size
for
i
in
range
(
num_frames
)]
# Large byte objects (dict values) intermitted with small objects
# (dict keys)
obj
=
{
i
:
bytes
([
i
])
*
frame_size
for
i
in
range
(
num_frames
)}
for
proto
in
range
(
4
,
pickle
.
HIGHEST_PROTOCOL
+
1
):
pickled
=
self
.
dumps
(
obj
,
proto
)
...
...
Misc/NEWS.d/next/Library/2018-01-07-09-22-26.bpo-32503.ViMxpD.rst
0 → 100644
View file @
1211c9a9
Pickling with protocol 4 no longer creates too small frames.
Modules/_pickle.c
View file @
1211c9a9
...
...
@@ -119,8 +119,8 @@ enum {
/* Prefetch size when unpickling (disabled on unpeekable streams) */
PREFETCH
=
8192
*
16
,
FRAME_SIZE_MIN
=
4
,
FRAME_SIZE_TARGET
=
64
*
1024
,
FRAME_HEADER_SIZE
=
9
};
...
...
@@ -949,13 +949,6 @@ _write_size64(char *out, size_t value)
}
}
static
void
_Pickler_WriteFrameHeader
(
PicklerObject
*
self
,
char
*
qdata
,
size_t
frame_len
)
{
qdata
[
0
]
=
FRAME
;
_write_size64
(
qdata
+
1
,
frame_len
);
}
static
int
_Pickler_CommitFrame
(
PicklerObject
*
self
)
{
...
...
@@ -966,7 +959,14 @@ _Pickler_CommitFrame(PicklerObject *self)
return
0
;
frame_len
=
self
->
output_len
-
self
->
frame_start
-
FRAME_HEADER_SIZE
;
qdata
=
PyBytes_AS_STRING
(
self
->
output_buffer
)
+
self
->
frame_start
;
_Pickler_WriteFrameHeader
(
self
,
qdata
,
frame_len
);
if
(
frame_len
>=
FRAME_SIZE_MIN
)
{
qdata
[
0
]
=
FRAME
;
_write_size64
(
qdata
+
1
,
frame_len
);
}
else
{
memmove
(
qdata
,
qdata
+
FRAME_HEADER_SIZE
,
frame_len
);
self
->
output_len
-=
FRAME_HEADER_SIZE
;
}
self
->
frame_start
=
-
1
;
return
0
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment