Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
04248a8d
Commit
04248a8d
authored
Oct 12, 2010
by
Antoine Pitrou
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #3873: Speed up unpickling from file objects which have a peek()
method.
parent
b0182c8c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
138 additions
and
25 deletions
+138
-25
Lib/test/pickletester.py
Lib/test/pickletester.py
+42
-1
Misc/NEWS
Misc/NEWS
+3
-0
Modules/_pickle.c
Modules/_pickle.c
+93
-24
No files found.
Lib/test/pickletester.py
View file @
04248a8d
...
...
@@ -30,6 +30,21 @@ def count_opcode(code, pickle):
n
+=
1
return
n
class
UnseekableIO
(
io
.
BytesIO
):
def
peek
(
self
,
*
args
):
raise
NotImplementedError
def
seekable
(
self
):
return
False
def
seek
(
self
,
*
args
):
raise
io
.
UnsupportedOperation
def
tell
(
self
):
raise
io
.
UnsupportedOperation
# We can't very well test the extension registry without putting known stuff
# in it, but we have to be careful to restore its original state. Code
# should do this:
...
...
@@ -1072,9 +1087,10 @@ class AbstractPickleTests(unittest.TestCase):
# Test the correctness of internal buffering routines when handling
# large data.
for
proto
in
protocols
:
data
=
(
1
,
b'x'
*
(
256
*
1024
)
)
data
=
(
1
,
min
,
b'xy'
*
(
30
*
1024
),
len
)
dumped
=
self
.
dumps
(
data
,
proto
)
loaded
=
self
.
loads
(
dumped
)
self
.
assertEqual
(
len
(
loaded
),
len
(
data
))
self
.
assertEqual
(
loaded
,
data
)
...
...
@@ -1373,6 +1389,31 @@ class AbstractPicklerUnpicklerObjectTests(unittest.TestCase):
f
.
seek
(
0
)
self
.
assertEqual
(
unpickler
.
load
(),
data2
)
def
_check_multiple_unpicklings
(
self
,
ioclass
):
for
proto
in
protocols
:
data1
=
[(
x
,
str
(
x
))
for
x
in
range
(
2000
)]
+
[
b"abcde"
,
len
]
f
=
ioclass
()
pickler
=
self
.
pickler_class
(
f
,
protocol
=
proto
)
pickler
.
dump
(
data1
)
pickled
=
f
.
getvalue
()
N
=
5
f
=
ioclass
(
pickled
*
N
)
unpickler
=
self
.
unpickler_class
(
f
)
for
i
in
range
(
N
):
if
f
.
seekable
():
pos
=
f
.
tell
()
self
.
assertEqual
(
unpickler
.
load
(),
data1
)
if
f
.
seekable
():
self
.
assertEqual
(
f
.
tell
(),
pos
+
len
(
pickled
))
self
.
assertRaises
(
EOFError
,
unpickler
.
load
)
def
test_multiple_unpicklings_seekable
(
self
):
self
.
_check_multiple_unpicklings
(
io
.
BytesIO
)
def
test_multiple_unpicklings_unseekable
(
self
):
self
.
_check_multiple_unpicklings
(
UnseekableIO
)
if
__name__
==
"__main__"
:
# Print some stuff that can be used to rewrite DATA{0,1,2}
...
...
Misc/NEWS
View file @
04248a8d
...
...
@@ -13,6 +13,9 @@ Core and Builtins
Library
-------
- Issue #3873: Speed up unpickling from file objects which have a peek()
method.
- Issue #10075: Add a session_stats() method to SSLContext objects.
- Issue #9948: Fixed problem of losing filename case information.
...
...
Modules/_pickle.c
View file @
04248a8d
...
...
@@ -101,6 +101,9 @@ enum {
/* Maximum size of the write buffer of Pickler when pickling to a
stream. This is ignored for in-memory pickling. */
MAX_WRITE_BUF_SIZE
=
64
*
1024
,
/* Prefetch size when unpickling (disabled on unpeekable streams) */
PREFETCH
=
8192
*
16
,
};
/* Exception classes for pickle. These should override the ones defined in
...
...
@@ -355,8 +358,10 @@ typedef struct UnpicklerObject {
char
*
input_line
;
Py_ssize_t
input_len
;
Py_ssize_t
next_read_idx
;
Py_ssize_t
prefetched_idx
;
/* index of first prefetched byte */
PyObject
*
read
;
/* read() method of the input stream. */
PyObject
*
readline
;
/* readline() method of the input stream. */
PyObject
*
peek
;
/* peek() method of the input stream, or NULL */
char
*
encoding
;
/* Name of the encoding to be used for
decoding strings pickled using Python
...
...
@@ -859,9 +864,28 @@ _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
self
->
input_buffer
=
self
->
buffer
.
buf
;
self
->
input_len
=
self
->
buffer
.
len
;
self
->
next_read_idx
=
0
;
self
->
prefetched_idx
=
self
->
input_len
;
return
self
->
input_len
;
}
static
int
_Unpickler_SkipConsumed
(
UnpicklerObject
*
self
)
{
Py_ssize_t
consumed
=
self
->
next_read_idx
-
self
->
prefetched_idx
;
if
(
consumed
>
0
)
{
PyObject
*
r
;
assert
(
self
->
peek
);
/* otherwise we did something wrong */
/* This makes an useless copy... */
r
=
PyObject_CallFunction
(
self
->
read
,
"n"
,
consumed
);
if
(
r
==
NULL
)
return
-
1
;
Py_DECREF
(
r
);
self
->
prefetched_idx
=
self
->
next_read_idx
;
}
return
0
;
}
static
const
Py_ssize_t
READ_WHOLE_LINE
=
-
1
;
/* If reading from a file, we need to only pull the bytes we need, since there
...
...
@@ -882,10 +906,12 @@ static Py_ssize_t
_Unpickler_ReadFromFile
(
UnpicklerObject
*
self
,
Py_ssize_t
n
)
{
PyObject
*
data
;
Py_ssize_t
read_size
;
Py_ssize_t
read_size
,
prefetched_size
=
0
;
assert
(
self
->
read
!=
NULL
);
assert
(
self
->
next_read_idx
==
0
);
if
(
_Unpickler_SkipConsumed
(
self
)
<
0
)
return
-
1
;
if
(
n
==
READ_WHOLE_LINE
)
data
=
PyObject_Call
(
self
->
readline
,
empty_tuple
,
NULL
);
...
...
@@ -895,13 +921,41 @@ _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
return
-
1
;
data
=
_Unpickler_FastCall
(
self
,
self
->
read
,
len
);
}
if
(
data
==
NULL
)
return
-
1
;
read_size
=
_Unpickler_SetStringInput
(
self
,
data
);
self
->
input_len
=
0
;
/* Prefetch some data without advancing the file pointer, if possible */
if
(
self
->
peek
)
{
PyObject
*
len
,
*
prefetched
;
len
=
PyLong_FromSsize_t
(
PREFETCH
);
if
(
len
==
NULL
)
{
Py_DECREF
(
data
);
return
-
1
;
}
prefetched
=
_Unpickler_FastCall
(
self
,
self
->
peek
,
len
);
if
(
prefetched
==
NULL
)
{
if
(
PyErr_ExceptionMatches
(
PyExc_NotImplementedError
))
{
/* peek() is probably not supported by the given file object */
PyErr_Clear
();
Py_CLEAR
(
self
->
peek
);
}
else
{
Py_DECREF
(
data
);
return
-
1
;
}
}
else
{
assert
(
PyBytes_Check
(
prefetched
));
prefetched_size
=
PyBytes_GET_SIZE
(
prefetched
);
PyBytes_ConcatAndDel
(
&
data
,
prefetched
);
if
(
data
==
NULL
)
return
-
1
;
}
}
read_size
=
_Unpickler_SetStringInput
(
self
,
data
)
-
prefetched_size
;
Py_DECREF
(
data
);
self
->
prefetched_idx
=
read_size
;
return
read_size
;
}
...
...
@@ -921,30 +975,31 @@ _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
static
Py_ssize_t
_Unpickler_Read
(
UnpicklerObject
*
self
,
char
**
s
,
Py_ssize_t
n
)
{
Py_ssize_t
num_read
;
if
(
n
==
0
)
{
*
s
=
NULL
;
return
0
;
}
/* This condition will always be true if self->read. */
if
(
self
->
next_read_idx
+
n
>
self
->
input_len
)
{
if
(
self
->
read
)
{
Py_ssize_t
num_read
;
assert
(
self
->
next_read_idx
==
self
->
input_len
);
num_read
=
_Unpickler_ReadFromFile
(
self
,
n
);
if
(
n
<
0
)
return
-
1
;
if
(
num_read
==
n
)
{
*
s
=
self
->
input_buffer
;
return
num_read
;
}
}
if
(
self
->
next_read_idx
+
n
<=
self
->
input_len
)
{
*
s
=
self
->
input_buffer
+
self
->
next_read_idx
;
self
->
next_read_idx
+=
n
;
return
n
;
}
if
(
!
self
->
read
)
{
PyErr_Format
(
PyExc_EOFError
,
"Ran out of input"
);
return
-
1
;
}
num_read
=
_Unpickler_ReadFromFile
(
self
,
n
)
;
if
(
num_read
<
0
)
return
-
1
;
if
(
num_read
<
n
)
{
PyErr_Format
(
PyExc_EOFError
,
"Ran out of input"
);
return
-
1
;
}
assert
(
self
->
read
==
NULL
);
*
s
=
self
->
input_buffer
+
self
->
next_read_idx
;
self
->
next_read_idx
+=
n
;
*
s
=
self
->
input_buffer
;
self
->
next_read_idx
=
n
;
return
n
;
}
...
...
@@ -972,9 +1027,7 @@ _Unpickler_Readline(UnpicklerObject *self, char **result)
{
Py_ssize_t
i
,
num_read
;
/* This loop will never be entered if self->read is not NULL. */
for
(
i
=
self
->
next_read_idx
;
i
<
self
->
input_len
;
i
++
)
{
assert
(
self
->
read
==
NULL
);
if
(
self
->
input_buffer
[
i
]
==
'\n'
)
{
char
*
line_start
=
self
->
input_buffer
+
self
->
next_read_idx
;
num_read
=
i
-
self
->
next_read_idx
+
1
;
...
...
@@ -983,11 +1036,11 @@ _Unpickler_Readline(UnpicklerObject *self, char **result)
}
}
if
(
self
->
read
)
{
assert
(
self
->
next_read_idx
==
self
->
input_len
);
num_read
=
_Unpickler_ReadFromFile
(
self
,
READ_WHOLE_LINE
);
if
(
num_read
<
0
)
return
-
1
;
*
result
=
self
->
input_buffer
;
self
->
next_read_idx
=
num_read
;
return
num_read
;
}
...
...
@@ -1106,8 +1159,10 @@ _Unpickler_New(void)
self
->
input_line
=
NULL
;
self
->
input_len
=
0
;
self
->
next_read_idx
=
0
;
self
->
prefetched_idx
=
0
;
self
->
read
=
NULL
;
self
->
readline
=
NULL
;
self
->
peek
=
NULL
;
self
->
encoding
=
NULL
;
self
->
errors
=
NULL
;
self
->
marks
=
NULL
;
...
...
@@ -1124,6 +1179,13 @@ _Unpickler_New(void)
static
int
_Unpickler_SetInputStream
(
UnpicklerObject
*
self
,
PyObject
*
file
)
{
self
->
peek
=
PyObject_GetAttrString
(
file
,
"peek"
);
if
(
self
->
peek
==
NULL
)
{
if
(
PyErr_ExceptionMatches
(
PyExc_AttributeError
))
PyErr_Clear
();
else
return
-
1
;
}
self
->
read
=
PyObject_GetAttrString
(
file
,
"read"
);
self
->
readline
=
PyObject_GetAttrString
(
file
,
"readline"
);
if
(
self
->
readline
==
NULL
||
self
->
read
==
NULL
)
{
...
...
@@ -1132,6 +1194,7 @@ _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
"file must have 'read' and 'readline' attributes"
);
Py_CLEAR
(
self
->
read
);
Py_CLEAR
(
self
->
readline
);
Py_CLEAR
(
self
->
peek
);
return
-
1
;
}
return
0
;
...
...
@@ -5207,6 +5270,9 @@ load(UnpicklerObject *self)
break
;
/* and we are done! */
}
if
(
_Unpickler_SkipConsumed
(
self
)
<
0
)
return
NULL
;
/* XXX: It is not clear what this is actually for. */
if
((
err
=
PyErr_Occurred
()))
{
if
(
err
==
PyExc_EOFError
)
{
...
...
@@ -5356,6 +5422,7 @@ Unpickler_dealloc(UnpicklerObject *self)
PyObject_GC_UnTrack
((
PyObject
*
)
self
);
Py_XDECREF
(
self
->
readline
);
Py_XDECREF
(
self
->
read
);
Py_XDECREF
(
self
->
peek
);
Py_XDECREF
(
self
->
stack
);
Py_XDECREF
(
self
->
pers_func
);
Py_XDECREF
(
self
->
arg
);
...
...
@@ -5378,6 +5445,7 @@ Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
{
Py_VISIT
(
self
->
readline
);
Py_VISIT
(
self
->
read
);
Py_VISIT
(
self
->
peek
);
Py_VISIT
(
self
->
stack
);
Py_VISIT
(
self
->
pers_func
);
Py_VISIT
(
self
->
arg
);
...
...
@@ -5389,6 +5457,7 @@ Unpickler_clear(UnpicklerObject *self)
{
Py_CLEAR
(
self
->
readline
);
Py_CLEAR
(
self
->
read
);
Py_CLEAR
(
self
->
peek
);
Py_CLEAR
(
self
->
stack
);
Py_CLEAR
(
self
->
pers_func
);
Py_CLEAR
(
self
->
arg
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment