Merge heads.

8157459d · Alexandre Vassalotti · 1aca953a · 8db89ca5 · 8157459d · 8157459d
Commit 8157459d authored Apr 14, 2013 by Alexandre Vassalotti
Hide whitespace changes
Inline Side-by-side

Showing with 109 additions and 21 deletions

Lib/pickletools.py Lib/pickletools.py +106 -21

Misc/NEWS Misc/NEWS +3 -0

No files found.
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -13,6 +13,7 @@ dis(pickle, out=None, memo=None, indentlevel=4)
 import codecs
 import pickle
 import re
+import sys

 __all__ = ['dis', 'genops', 'optimize']

@@ -165,8 +166,9 @@ UP_TO_NEWLINE = -1

 # Represents the number of bytes consumed by a two-argument opcode where
 # the first argument gives the number of bytes in the second argument.
-TAKEN_FROM_ARGUMENT1 = -2   # num bytes is 1-byte unsigned int
-TAKEN_FROM_ARGUMENT4 = -3   # num bytes is 4-byte signed little-endian int
+TAKEN_FROM_ARGUMENT1  = -2   # num bytes is 1-byte unsigned int
+TAKEN_FROM_ARGUMENT4  = -3   # num bytes is 4-byte signed little-endian int
+TAKEN_FROM_ARGUMENT4U = -4   # num bytes is 4-byte unsigned little-endian int

 class ArgumentDescriptor(object):
    __slots__ = (
@@ -194,7 +196,8 @@ class ArgumentDescriptor(object):
        assert isinstance(n, int) and (n >= 0 or
                                       n in (UP_TO_NEWLINE,
                                             TAKEN_FROM_ARGUMENT1,
-                                             TAKEN_FROM_ARGUMENT4))
+                                             TAKEN_FROM_ARGUMENT4,
+                                             TAKEN_FROM_ARGUMENT4U))
        self.n = n

        self.reader = reader
@@ -265,6 +268,27 @@ int4 = ArgumentDescriptor(
           doc="Four-byte signed integer, little-endian, 2's complement.")


+def read_uint4(f):
+    r"""
+    >>> import io
+    >>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
+    255
+    >>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
+    True
+    """
+
+    data = f.read(4)
+    if len(data) == 4:
+        return _unpack("<I", data)[0]
+    raise ValueError("not enough data in stream to read uint4")
+
+uint4 = ArgumentDescriptor(
+            name='uint4',
+            n=4,
+            reader=read_uint4,
+            doc="Four-byte unsigned integer, little-endian.")
+
+
 def read_stringnl(f, decode=True, stripquotes=True):
    r"""
    >>> import io
@@ -421,6 +445,67 @@ string1 = ArgumentDescriptor(
              """)


+def read_bytes1(f):
+    r"""
+    >>> import io
+    >>> read_bytes1(io.BytesIO(b"\x00"))
+    b''
+    >>> read_bytes1(io.BytesIO(b"\x03abcdef"))
+    b'abc'
+    """
+
+    n = read_uint1(f)
+    assert n >= 0
+    data = f.read(n)
+    if len(data) == n:
+        return data
+    raise ValueError("expected %d bytes in a bytes1, but only %d remain" %
+                     (n, len(data)))
+
+bytes1 = ArgumentDescriptor(
+              name="bytes1",
+              n=TAKEN_FROM_ARGUMENT1,
+              reader=read_bytes1,
+              doc="""A counted bytes string.
+
+              The first argument is a 1-byte unsigned int giving the number
+              of bytes, and the second argument is that many bytes.
+              """)
+
+
+def read_bytes4(f):
+    r"""
+    >>> import io
+    >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
+    b''
+    >>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
+    b'abc'
+    >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
+    Traceback (most recent call last):
+    ...
+    ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
+    """
+
+    n = read_uint4(f)
+    if n > sys.maxsize:
+        raise ValueError("bytes4 byte count > sys.maxsize: %d" % n)
+    data = f.read(n)
+    if len(data) == n:
+        return data
+    raise ValueError("expected %d bytes in a bytes4, but only %d remain" %
+                     (n, len(data)))
+
+bytes4 = ArgumentDescriptor(
+              name="bytes4",
+              n=TAKEN_FROM_ARGUMENT4U,
+              reader=read_bytes4,
+              doc="""A counted bytes string.
+
+              The first argument is a 4-byte little-endian unsigned int giving
+              the number of bytes, and the second argument is that many bytes.
+              """)
+
+
 def read_unicodestringnl(f):
    r"""
    >>> import io
@@ -464,9 +549,9 @@ def read_unicodestring4(f):
    ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
    """

-    n = read_int4(f)
-    if n < 0:
-        raise ValueError("unicodestring4 byte count < 0: %d" % n)
+    n = read_uint4(f)
+    if n > sys.maxsize:
+        raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n)
    data = f.read(n)
    if len(data) == n:
        return str(data, 'utf-8', 'surrogatepass')
@@ -475,7 +560,7 @@ def read_unicodestring4(f):

 unicodestring4 = ArgumentDescriptor(
                    name="unicodestring4",
-                    n=TAKEN_FROM_ARGUMENT4,
+                    n=TAKEN_FROM_ARGUMENT4U,
                    reader=read_unicodestring4,
                    doc="""A counted Unicode string.

@@ -872,7 +957,7 @@ class OpcodeInfo(object):
            assert isinstance(x, StackObject)
        self.stack_after = stack_after

-        assert isinstance(proto, int) and 0 <= proto <= 3
+        assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL
        self.proto = proto

        assert isinstance(doc, str)
@@ -1038,28 +1123,28 @@ opcodes = [

    I(name='BINBYTES',
      code='B',
-      arg=string4,
+      arg=bytes4,
      stack_before=[],
      stack_after=[pybytes],
      proto=3,
      doc="""Push a Python bytes object.

-      There are two arguments:  the first is a 4-byte little-endian signed int
-      giving the number of bytes in the string, and the second is that many
-      bytes, which are taken literally as the bytes content.
+      There are two arguments:  the first is a 4-byte little-endian unsigned int
+      giving the number of bytes, and the second is that many bytes, which are
+      taken literally as the bytes content.
      """),

    I(name='SHORT_BINBYTES',
      code='C',
-      arg=string1,
+      arg=bytes1,
      stack_before=[],
      stack_after=[pybytes],
      proto=3,
-      doc="""Push a Python string object.
+      doc="""Push a Python bytes object.

      There are two arguments:  the first is a 1-byte unsigned int giving
-      the number of bytes in the string, and the second is that many bytes,
-      which are taken literally as the string content.
+      the number of bytes, and the second is that many bytes, which are taken
+      literally as the string content.
      """),

    # Ways to spell None.
@@ -1118,7 +1203,7 @@ opcodes = [
      proto=1,
      doc="""Push a Python Unicode string object.

-      There are two arguments:  the first is a 4-byte little-endian signed int
+      There are two arguments:  the first is a 4-byte little-endian unsigned int
      giving the number of bytes in the string.  The second is that many
      bytes, and is the UTF-8 encoding of the Unicode string.
      """),
@@ -1422,13 +1507,13 @@ opcodes = [

    I(name='LONG_BINGET',
      code='j',
-      arg=int4,
+      arg=uint4,
      stack_before=[],
      stack_after=[anyobject],
      proto=1,
      doc="""Read an object from the memo and push it on the stack.

-      The index of the memo object to push is given by the 4-byte signed
+      The index of the memo object to push is given by the 4-byte unsigned
      little-endian integer following.
      """),

@@ -1459,14 +1544,14 @@ opcodes = [

    I(name='LONG_BINPUT',
      code='r',
-      arg=int4,
+      arg=uint4,
      stack_before=[],
      stack_after=[],
      proto=1,
      doc="""Store the stack top into the memo.  The stack is not popped.

      The index of the memo location to write into is given by the 4-byte
-      signed little-endian integer following.
+      unsigned little-endian integer following.
      """),

    # Access the extension registry (predefined objects).  Akin to the GET

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -58,6 +58,9 @@ Library
 - Issue #17526: fix an IndexError raised while passing code without filename to
  inspect.findsource().  Initial patch by Tyler Doyle.

+- Issue #16550: Update the opcode descriptions of pickletools to use unsigned
+  integers where appropriate.  Initial patch by Serhiy Storchaka.
+
 IDLE
 ----