Begin documenting protocol 2.

5a2d8f5e · Guido van Rossum · bb38e306 · 5a2d8f5e · 5a2d8f5e
Commit 5a2d8f5e authored Jan 27, 2003 by Guido van Rossum
Hide whitespace changes
Inline Side-by-side

Showing with 304 additions and 0 deletions

Lib/pickle.py Lib/pickle.py +16 -0

Lib/pickletools.py Lib/pickletools.py +288 -0

No files found.
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -127,6 +127,22 @@ BINFLOAT        = 'G'   # push float; arg is 8-byte float encoding
 TRUE            = 'I01\n'  # not an opcode; see INT docs in pickletools.py
 FALSE           = 'I00\n'  # not an opcode; see INT docs in pickletools.py

+# Protocol 2 (not yet implemented) (XXX comments will be added later)
+
+NEWOBJ          = '\x81'
+PROTO           = '\x80'
+EXT2            = '\x83'
+EXT1            = '\x82'
+TUPLE1          = '\x85'
+EXT4            = '\x84'
+TUPLE3          = '\x87'
+TUPLE2          = '\x86'
+NEWFALSE        = '\x89'
+NEWTRUE         = '\x88'
+LONG2           = '\x8b'
+LONG1           = '\x8a'
+LONG4           = '\x8c'
+

 __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
 del x

--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -578,6 +578,123 @@ float8 = ArgumentDescriptor(
             (may not survive roundtrip pickling intact).
             """)

+# Protocol 2 formats
+
+def decode_long(data):
+    r"""Decode a long from a two's complement little-endian binary string.
+    >>> decode_long("\xff\x00")
+    255L
+    >>> decode_long("\xff\x7f")
+    32767L
+    >>> decode_long("\x00\xff")
+    -256L
+    >>> decode_long("\x00\x80")
+    -32768L
+    >>> 
+    """
+    x = 0L
+    i = 0L
+    for c in data:
+        x |= long(ord(c)) << i
+        i += 8L
+    if i and (x & (1L << (i-1L))):
+        x -= 1L << i
+    return x
+
+def read_long1(f):
+    r"""
+    >>> import StringIO
+    >>> read_long1(StringIO.StringIO("\x02\xff\x00"))
+    255L
+    >>> read_long1(StringIO.StringIO("\x02\xff\x7f"))
+    32767L
+    >>> read_long1(StringIO.StringIO("\x02\x00\xff"))
+    -256L
+    >>> read_long1(StringIO.StringIO("\x02\x00\x80"))
+    -32768L
+    >>> 
+    """
+
+    n = read_uint1(f)
+    data = f.read(n)
+    if len(data) != n:
+        raise ValueError("not enough data in stream to read long1")
+    return decode_long(data)
+
+long1 = ArgumentDescriptor(
+    name="long1",
+    n=TAKEN_FROM_ARGUMENT,
+    reader=read_long1,
+    doc="""A binary long, little-endian, using 1-byte size.
+
+    This first reads one byte as an unsigned size, then reads that
+    many bytes and interprets them as a little-endian long.
+    """)
+
+def read_long2(f):
+    r"""
+    >>> import StringIO
+    >>> read_long2(StringIO.StringIO("\x02\x00\xff\x00"))
+    255L
+    >>> read_long2(StringIO.StringIO("\x02\x00\xff\x7f"))
+    32767L
+    >>> read_long2(StringIO.StringIO("\x02\x00\x00\xff"))
+    -256L
+    >>> read_long2(StringIO.StringIO("\x02\x00\x00\x80"))
+    -32768L
+    >>> 
+    """
+
+    n = read_uint2(f)
+    data = f.read(n)
+    if len(data) != n:
+        raise ValueError("not enough data in stream to read long2")
+    return decode_long(data)
+
+long2 = ArgumentDescriptor(
+    name="long2",
+    n=TAKEN_FROM_ARGUMENT,
+    reader=read_long2,
+    doc="""A binary long, little-endian, using 2-byte size.
+
+    This first reads two byte as an unsigned size, then reads that
+    many bytes and interprets them as a little-endian long.
+    """)
+
+def read_long4(f):
+    r"""
+    >>> import StringIO
+    >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x00"))
+    255L
+    >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x7f"))
+    32767L
+    >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\xff"))
+    -256L
+    >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\x80"))
+    -32768L
+    >>> 
+    """
+
+    n = read_int4(f)
+    if n < 0:
+        raise ValueError("unicodestring4 byte count < 0: %d" % n)
+    data = f.read(n)
+    if len(data) != n:
+        raise ValueError("not enough data in stream to read long1")
+    return decode_long(data)
+
+long4 = ArgumentDescriptor(
+    name="long4",
+    n=TAKEN_FROM_ARGUMENT,
+    reader=read_long4,
+    doc="""A binary representation of a long, little-endian.
+
+    This first reads four bytes as a signed size (but requires the
+    size to be >= 0), then reads that many bytes and interprets them
+    as a little-endian long.
+    """)
+
+
 ##############################################################################
 # Object descriptors.  The stack used by the pickle machine holds objects,
 # and in the stack_before and stack_after attributes of OpcodeInfo
@@ -627,6 +744,11 @@ pyinteger_or_bool = StackObject(
                        doc="A Python integer object (short or long), or "
                            "a Python bool.")

+pybool = StackObject(
+             name='bool',
+             obtype=(bool,),
+             doc="A Python bool object.")
+
 pyfloat = StackObject(
              name='float',
              obtype=float,
@@ -1436,6 +1558,172 @@ opcodes = [
      ID is passed to self.persistent_load(), and whatever object that
      returns is pushed on the stack.  See PERSID for more detail.
      """),
+
+    # Protocol 2 opcodes
+
+    I(name='PROTO',
+      code='\x80',
+      arg=uint1,
+      stack_before=[],
+      stack_after=[],
+      proto=2,
+      doc="""Protocol version indicator.
+
+      For protocol 2 and above, a pickle must start with this opcode.
+      The argument is the protocol version, an int in range(2, 256).
+      """),
+
+    I(name='NEWOBJ',
+      code='\x81',
+      arg=None,
+      stack_before=[anyobject, anyobject],
+      stack_after=[anyobject],
+      proto=2,
+      doc="""Build an object instance.
+
+      The stack before should be thought of as containing a class
+      object followed by an argument tuple (the tuple being the stack
+      top).  Call these cls and args.  They are popped off the stack,
+      and the value returned by cls.__new__(cls, *args) is pushed back
+      onto the stack.
+      """),
+
+    I(name='EXT1',
+      code='\x82',
+      arg=uint1,
+      stack_before=[],
+      stack_after=[anyobject],
+      proto=2,
+      doc="""Extension code.
+
+      This code and the similar EXT2 and EXT4 allow using a registry
+      of popular objects that are pickled by name, typically classes.
+      It is envisioned that through a global negotiation and
+      registration process, third parties can set up a mapping between
+      ints and object names.
+
+      In order to guarantee pickle interchangeability, the extension
+      code registry ought to be global, although a range of codes may
+      be reserved for private use.
+      """),
+
+    I(name='EXT2',
+      code='\x83',
+      arg=uint2,
+      stack_before=[],
+      stack_after=[anyobject],
+      proto=2,
+      doc="""Extension code.
+
+      See EXT1.
+      """),
+
+    I(name='EXT4',
+      code='\x84',
+      arg=int4,
+      stack_before=[],
+      stack_after=[anyobject],
+      proto=2,
+      doc="""Extension code.
+
+      See EXT1.
+      """),
+
+    I(name='TUPLE1',
+      code='\x85',
+      arg=None,
+      stack_before=[anyobject],
+      stack_after=[pytuple],
+      proto=2,
+      doc="""One-tuple.
+
+      This code pops one value off the stack and pushes a tuple of
+      length 1 whose one item is that value back onto it.  IOW:
+
+          stack[-1] = tuple(stack[-1:])
+      """),
+
+    I(name='TUPLE2',
+      code='\x86',
+      arg=None,
+      stack_before=[anyobject, anyobject],
+      stack_after=[pytuple],
+      proto=2,
+      doc="""One-tuple.
+
+      This code pops two values off the stack and pushes a tuple
+      of length 2 whose items are those values back onto it.  IOW:
+
+          stack[-2:] = [tuple(stack[-2:])]
+      """),
+
+    I(name='TUPLE3',
+      code='\x87',
+      arg=None,
+      stack_before=[anyobject, anyobject, anyobject],
+      stack_after=[pytuple],
+      proto=2,
+      doc="""One-tuple.
+
+      This code pops three values off the stack and pushes a tuple
+      of length 3 whose items are those values back onto it.  IOW:
+
+          stack[-3:] = [tuple(stack[-3:])]
+      """),
+
+    I(name='NEWTRUE',
+      code='\x88',
+      arg=None,
+      stack_before=[],
+      stack_after=[pybool],
+      proto=2,
+      doc="""True.
+
+      Push True onto the stack."""),
+
+    I(name='NEWFALSE',
+      code='\x89',
+      arg=None,
+      stack_before=[],
+      stack_after=[pybool],
+      proto=2,
+      doc="""True.
+
+      Push False onto the stack."""),
+
+    I(name="LONG1",
+      code='\x8a',
+      arg=long1,
+      stack_before=[],
+      stack_after=[pylong],
+      proto=2,
+      doc="""Long integer using one-byte length.
+
+      A more efficient encoding of a Python long; the long1 encoding
+      says it all."""),
+
+    I(name="LONG2",
+      code='\x8b',
+      arg=long2,
+      stack_before=[],
+      stack_after=[pylong],
+      proto=2,
+      doc="""Long integer using two-byte length.
+
+      A more efficient encoding of a Python long; the long2 encoding
+      says it all."""),
+
+    I(name="LONG4",
+      code='\x8c',
+      arg=long4,
+      stack_before=[],
+      stack_after=[pylong],
+      proto=2,
+      doc="""Long integer using found-byte length.
+
+      A more efficient encoding of a Python long; the long4 encoding
+      says it all."""),
+
 ]
 del I