Commit 05dadcfb authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can

produce more compact result and no longer produces invalid output if input
data contains MEMOIZE opcodes together with PUT or BINPUT opcodes.
parent df938694
......@@ -2282,40 +2282,61 @@ def genops(pickle):
def optimize(p):
'Optimize a pickle string by removing unused PUT opcodes'
not_a_put = object()
gets = { not_a_put } # set of args used by a GET opcode
opcodes = [] # (startpos, stoppos, putid)
put = 'PUT'
get = 'GET'
oldids = set() # set of all PUT ids
newids = {} # set of ids used by a GET opcode
opcodes = [] # (op, idx) or (pos, end_pos)
proto = 0
protoheader = b''
for opcode, arg, pos, end_pos in _genops(p, yield_end_pos=True):
if 'PUT' in opcode.name:
opcodes.append((pos, end_pos, arg))
oldids.add(arg)
opcodes.append((put, arg))
elif opcode.name == 'MEMOIZE':
idx = len(oldids)
oldids.add(idx)
opcodes.append((put, idx))
elif 'FRAME' in opcode.name:
pass
else:
if 'GET' in opcode.name:
gets.add(arg)
elif opcode.name == 'PROTO':
assert pos == 0, pos
elif 'GET' in opcode.name:
if opcode.proto > proto:
proto = opcode.proto
newids[arg] = None
opcodes.append((get, arg))
elif opcode.name == 'PROTO':
if arg > proto:
proto = arg
opcodes.append((pos, end_pos, not_a_put))
prevpos, prevarg = pos, None
if pos == 0:
protoheader = p[pos: end_pos]
else:
opcodes.append((pos, end_pos))
else:
opcodes.append((pos, end_pos))
del oldids
# Copy the opcodes except for PUTS without a corresponding GET
out = io.BytesIO()
opcodes = iter(opcodes)
if proto >= 2:
# Write the PROTO header before any framing
start, stop, _ = next(opcodes)
out.write(p[start:stop])
buf = pickle._Framer(out.write)
# Write the PROTO header before any framing
out.write(protoheader)
pickler = pickle._Pickler(out, proto)
if proto >= 4:
buf.start_framing()
for start, stop, putid in opcodes:
if putid in gets:
buf.commit_frame()
buf.write(p[start:stop])
if proto >= 4:
buf.end_framing()
pickler.framer.start_framing()
idx = 0
for op, arg in opcodes:
if op is put:
if arg not in newids:
continue
data = pickler.put(idx)
newids[arg] = idx
idx += 1
elif op is get:
data = pickler.get(newids[arg])
else:
data = p[op:arg]
pickler.framer.commit_frame()
pickler.write(data)
pickler.framer.end_framing()
return out.getvalue()
##############################################################################
......
import struct
import pickle
import pickletools
from test import support
......@@ -15,6 +16,48 @@ class OptimizedPickleTests(AbstractPickleTests, AbstractPickleModuleTests):
# Test relies on precise output of dumps()
test_pickle_to_2x = None
def test_optimize_long_binget(self):
data = [str(i) for i in range(257)]
data.append(data[-1])
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
pickled = pickle.dumps(data, proto)
unpickled = pickle.loads(pickled)
self.assertEqual(unpickled, data)
self.assertIs(unpickled[-1], unpickled[-2])
pickled2 = pickletools.optimize(pickled)
unpickled2 = pickle.loads(pickled2)
self.assertEqual(unpickled2, data)
self.assertIs(unpickled2[-1], unpickled2[-2])
self.assertNotIn(pickle.LONG_BINGET, pickled2)
self.assertNotIn(pickle.LONG_BINPUT, pickled2)
def test_optimize_binput_and_memoize(self):
pickled = (b'\x80\x04\x95\x15\x00\x00\x00\x00\x00\x00\x00'
b']\x94(\x8c\x04spamq\x01\x8c\x03ham\x94h\x02e.')
# 0: \x80 PROTO 4
# 2: \x95 FRAME 21
# 11: ] EMPTY_LIST
# 12: \x94 MEMOIZE
# 13: ( MARK
# 14: \x8c SHORT_BINUNICODE 'spam'
# 20: q BINPUT 1
# 22: \x8c SHORT_BINUNICODE 'ham'
# 27: \x94 MEMOIZE
# 28: h BINGET 2
# 30: e APPENDS (MARK at 13)
# 31: . STOP
self.assertIn(pickle.BINPUT, pickled)
unpickled = pickle.loads(pickled)
self.assertEqual(unpickled, ['spam', 'ham', 'ham'])
self.assertIs(unpickled[1], unpickled[2])
pickled2 = pickletools.optimize(pickled)
unpickled2 = pickle.loads(pickled2)
self.assertEqual(unpickled2, ['spam', 'ham', 'ham'])
self.assertIs(unpickled2[1], unpickled2[2])
self.assertNotIn(pickle.BINPUT, pickled2)
def test_main():
support.run_unittest(OptimizedPickleTests)
......
......@@ -41,6 +41,10 @@ Core and Builtins
Library
-------
- Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can
produce more compact result and no longer produces invalid output if input
data contains MEMOIZE opcodes together with PUT or BINPUT opcodes.
- Issue #22095: Fixed HTTPConnection.set_tunnel with default port. The port
value in the host header was set to "None". Patch by Demian Brecht.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment