Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
8157459d
Commit
8157459d
authored
Apr 14, 2013
by
Alexandre Vassalotti
Browse files
Options
Browse Files
Download
Plain Diff
Merge heads.
parents
1aca953a
8db89ca5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
109 additions
and
21 deletions
+109
-21
Lib/pickletools.py
Lib/pickletools.py
+106
-21
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Lib/pickletools.py
View file @
8157459d
...
...
@@ -13,6 +13,7 @@ dis(pickle, out=None, memo=None, indentlevel=4)
import
codecs
import
pickle
import
re
import
sys
__all__
=
[
'dis'
,
'genops'
,
'optimize'
]
...
...
@@ -165,8 +166,9 @@ UP_TO_NEWLINE = -1
# Represents the number of bytes consumed by a two-argument opcode where
# the first argument gives the number of bytes in the second argument.
TAKEN_FROM_ARGUMENT1
=
-
2
# num bytes is 1-byte unsigned int
TAKEN_FROM_ARGUMENT4
=
-
3
# num bytes is 4-byte signed little-endian int
TAKEN_FROM_ARGUMENT1
=
-
2
# num bytes is 1-byte unsigned int
TAKEN_FROM_ARGUMENT4
=
-
3
# num bytes is 4-byte signed little-endian int
TAKEN_FROM_ARGUMENT4U
=
-
4
# num bytes is 4-byte unsigned little-endian int
class
ArgumentDescriptor
(
object
):
__slots__
=
(
...
...
@@ -194,7 +196,8 @@ class ArgumentDescriptor(object):
assert
isinstance
(
n
,
int
)
and
(
n
>=
0
or
n
in
(
UP_TO_NEWLINE
,
TAKEN_FROM_ARGUMENT1
,
TAKEN_FROM_ARGUMENT4
))
TAKEN_FROM_ARGUMENT4
,
TAKEN_FROM_ARGUMENT4U
))
self
.
n
=
n
self
.
reader
=
reader
...
...
@@ -265,6 +268,27 @@ int4 = ArgumentDescriptor(
doc
=
"Four-byte signed integer, little-endian, 2's complement."
)
def
read_uint4
(
f
):
r"""
>>> import io
>>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
255
>>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
True
"""
data
=
f
.
read
(
4
)
if
len
(
data
)
==
4
:
return
_unpack
(
"<I"
,
data
)[
0
]
raise
ValueError
(
"not enough data in stream to read uint4"
)
uint4
=
ArgumentDescriptor
(
name
=
'uint4'
,
n
=
4
,
reader
=
read_uint4
,
doc
=
"Four-byte unsigned integer, little-endian."
)
def
read_stringnl
(
f
,
decode
=
True
,
stripquotes
=
True
):
r"""
>>> import io
...
...
@@ -421,6 +445,67 @@ string1 = ArgumentDescriptor(
"""
)
def
read_bytes1
(
f
):
r"""
>>> import io
>>> read_bytes1(io.BytesIO(b"\x00"))
b''
>>> read_bytes1(io.BytesIO(b"\x03abcdef"))
b'abc'
"""
n
=
read_uint1
(
f
)
assert
n
>=
0
data
=
f
.
read
(
n
)
if
len
(
data
)
==
n
:
return
data
raise
ValueError
(
"expected %d bytes in a bytes1, but only %d remain"
%
(
n
,
len
(
data
)))
bytes1
=
ArgumentDescriptor
(
name
=
"bytes1"
,
n
=
TAKEN_FROM_ARGUMENT1
,
reader
=
read_bytes1
,
doc
=
"""A counted bytes string.
The first argument is a 1-byte unsigned int giving the number
of bytes, and the second argument is that many bytes.
"""
)
def
read_bytes4
(
f
):
r"""
>>> import io
>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
b''
>>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
b'abc'
>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
Traceback (most recent call last):
...
ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
"""
n
=
read_uint4
(
f
)
if
n
>
sys
.
maxsize
:
raise
ValueError
(
"bytes4 byte count > sys.maxsize: %d"
%
n
)
data
=
f
.
read
(
n
)
if
len
(
data
)
==
n
:
return
data
raise
ValueError
(
"expected %d bytes in a bytes4, but only %d remain"
%
(
n
,
len
(
data
)))
bytes4
=
ArgumentDescriptor
(
name
=
"bytes4"
,
n
=
TAKEN_FROM_ARGUMENT4U
,
reader
=
read_bytes4
,
doc
=
"""A counted bytes string.
The first argument is a 4-byte little-endian unsigned int giving
the number of bytes, and the second argument is that many bytes.
"""
)
def
read_unicodestringnl
(
f
):
r"""
>>> import io
...
...
@@ -464,9 +549,9 @@ def read_unicodestring4(f):
ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
"""
n
=
read_int4
(
f
)
if
n
<
0
:
raise
ValueError
(
"unicodestring4 byte count
< 0
: %d"
%
n
)
n
=
read_
u
int4
(
f
)
if
n
>
sys
.
maxsize
:
raise
ValueError
(
"unicodestring4 byte count
> sys.maxsize
: %d"
%
n
)
data
=
f
.
read
(
n
)
if
len
(
data
)
==
n
:
return
str
(
data
,
'utf-8'
,
'surrogatepass'
)
...
...
@@ -475,7 +560,7 @@ def read_unicodestring4(f):
unicodestring4
=
ArgumentDescriptor
(
name
=
"unicodestring4"
,
n
=
TAKEN_FROM_ARGUMENT4
,
n
=
TAKEN_FROM_ARGUMENT4
U
,
reader
=
read_unicodestring4
,
doc
=
"""A counted Unicode string.
...
...
@@ -872,7 +957,7 @@ class OpcodeInfo(object):
assert
isinstance
(
x
,
StackObject
)
self
.
stack_after
=
stack_after
assert
isinstance
(
proto
,
int
)
and
0
<=
proto
<=
3
assert
isinstance
(
proto
,
int
)
and
0
<=
proto
<=
pickle
.
HIGHEST_PROTOCOL
self
.
proto
=
proto
assert
isinstance
(
doc
,
str
)
...
...
@@ -1038,28 +1123,28 @@ opcodes = [
I
(
name
=
'BINBYTES'
,
code
=
'B'
,
arg
=
string
4
,
arg
=
bytes
4
,
stack_before
=
[],
stack_after
=
[
pybytes
],
proto
=
3
,
doc
=
"""Push a Python bytes object.
There are two arguments: the first is a 4-byte little-endian signed int
giving the number of bytes
in the string, and the second is that many
bytes, which are
taken literally as the bytes content.
There are two arguments: the first is a 4-byte little-endian
un
signed int
giving the number of bytes
, and the second is that many bytes, which are
taken literally as the bytes content.
"""
),
I
(
name
=
'SHORT_BINBYTES'
,
code
=
'C'
,
arg
=
string
1
,
arg
=
bytes
1
,
stack_before
=
[],
stack_after
=
[
pybytes
],
proto
=
3
,
doc
=
"""Push a Python
string
object.
doc
=
"""Push a Python
bytes
object.
There are two arguments: the first is a 1-byte unsigned int giving
the number of bytes
in the string, and the second is that many bytes,
which are taken
literally as the string content.
the number of bytes
, and the second is that many bytes, which are taken
literally as the string content.
"""
),
# Ways to spell None.
...
...
@@ -1118,7 +1203,7 @@ opcodes = [
proto
=
1
,
doc
=
"""Push a Python Unicode string object.
There are two arguments: the first is a 4-byte little-endian signed int
There are two arguments: the first is a 4-byte little-endian
un
signed int
giving the number of bytes in the string. The second is that many
bytes, and is the UTF-8 encoding of the Unicode string.
"""
),
...
...
@@ -1422,13 +1507,13 @@ opcodes = [
I
(
name
=
'LONG_BINGET'
,
code
=
'j'
,
arg
=
int4
,
arg
=
u
int4
,
stack_before
=
[],
stack_after
=
[
anyobject
],
proto
=
1
,
doc
=
"""Read an object from the memo and push it on the stack.
The index of the memo object to push is given by the 4-byte signed
The index of the memo object to push is given by the 4-byte
un
signed
little-endian integer following.
"""
),
...
...
@@ -1459,14 +1544,14 @@ opcodes = [
I
(
name
=
'LONG_BINPUT'
,
code
=
'r'
,
arg
=
int4
,
arg
=
u
int4
,
stack_before
=
[],
stack_after
=
[],
proto
=
1
,
doc
=
"""Store the stack top into the memo. The stack is not popped.
The index of the memo location to write into is given by the 4-byte
signed little-endian integer following.
un
signed little-endian integer following.
"""
),
# Access the extension registry (predefined objects). Akin to the GET
...
...
Misc/NEWS
View file @
8157459d
...
...
@@ -58,6 +58,9 @@ Library
- Issue #17526: fix an IndexError raised while passing code without filename to
inspect.findsource(). Initial patch by Tyler Doyle.
- Issue #16550: Update the opcode descriptions of pickletools to use unsigned
integers where appropriate. Initial patch by Serhiy Storchaka.
IDLE
----
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment