Commit 42f08ac1 authored by Tim Peters's avatar Tim Peters

Implemented batching for dicts in cPickle. This is after two failed

attempts to merge the C list-batch and dict-batch code -- they worked, but
it was a godawful mess to read.
parent e7b33db2
...@@ -612,7 +612,8 @@ class Pickler: ...@@ -612,7 +612,8 @@ class Pickler:
dispatch[ListType] = save_list dispatch[ListType] = save_list
# Keep in synch with cPickle's BATCHSIZE. # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets
# out of synch, though.
_BATCHSIZE = 1000 _BATCHSIZE = 1000
def _batch_appends(self, items): def _batch_appends(self, items):
......
...@@ -694,23 +694,6 @@ class AbstractPickleTests(unittest.TestCase): ...@@ -694,23 +694,6 @@ class AbstractPickleTests(unittest.TestCase):
else: else:
self.failUnless(num_appends >= 2) self.failUnless(num_appends >= 2)
# XXX Temporary hack, so long as the C implementation of pickle protocol
# XXX 2 isn't ready. When it is, move the methods in TempAbstractPickleTests
# XXX into AbstractPickleTests above, and get rid of TempAbstractPickleTests
# XXX along with the references to it in test_pickle.py.
class TempAbstractPickleTests(unittest.TestCase):
def test_newobj_list_slots(self):
x = SlotList([1, 2, 3])
x.foo = 42
x.bar = "hello"
s = self.dumps(x, 2)
y = self.loads(s)
self.assertEqual(list(x), list(y))
self.assertEqual(x.__dict__, y.__dict__)
self.assertEqual(x.foo, y.foo)
self.assertEqual(x.bar, y.bar)
def test_dict_chunking(self): def test_dict_chunking(self):
n = 10 # too small to chunk n = 10 # too small to chunk
x = dict.fromkeys(range(n)) x = dict.fromkeys(range(n))
...@@ -733,6 +716,23 @@ class TempAbstractPickleTests(unittest.TestCase): ...@@ -733,6 +716,23 @@ class TempAbstractPickleTests(unittest.TestCase):
else: else:
self.failUnless(num_setitems >= 2) self.failUnless(num_setitems >= 2)
# XXX Temporary hack, so long as the C implementation of pickle protocol
# XXX 2 isn't ready. When it is, move the methods in TempAbstractPickleTests
# XXX into AbstractPickleTests above, and get rid of TempAbstractPickleTests
# XXX along with the references to it in test_pickle.py.
class TempAbstractPickleTests(unittest.TestCase):
def test_newobj_list_slots(self):
x = SlotList([1, 2, 3])
x.foo = 42
x.bar = "hello"
s = self.dumps(x, 2)
y = self.loads(s)
self.assertEqual(list(x), list(y))
self.assertEqual(x.__dict__, y.__dict__)
self.assertEqual(x.foo, y.foo)
self.assertEqual(x.bar, y.bar)
class MyInt(int): class MyInt(int):
sample = 1 sample = 1
......
...@@ -88,7 +88,9 @@ PyDoc_STRVAR(cPickle_module_documentation, ...@@ -88,7 +88,9 @@ PyDoc_STRVAR(cPickle_module_documentation,
#define FALSE "I00\n" #define FALSE "I00\n"
/* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
* batch_{list, dict} pump out before doing APPENDS/SETITEMS. * batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
* break if this gets out of synch with pickle.py, but it's unclear that
* would help anything either.
*/ */
#define BATCHSIZE 1000 #define BATCHSIZE 1000
...@@ -1709,7 +1711,6 @@ save_list(Picklerobject *self, PyObject *args) ...@@ -1709,7 +1711,6 @@ save_list(Picklerobject *self, PyObject *args)
int len; int len;
PyObject *iter; PyObject *iter;
if (self->fast && !fast_save_enter(self, args)) if (self->fast && !fast_save_enter(self, args))
goto finally; goto finally;
...@@ -1756,18 +1757,123 @@ save_list(Picklerobject *self, PyObject *args) ...@@ -1756,18 +1757,123 @@ save_list(Picklerobject *self, PyObject *args)
} }
/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
* MARK key value ... key value SETITEMS
* opcode sequences. Calling code should have arranged to first create an
* empty dict, or dict-like object, for the SETITEMS to operate on.
* Returns 0 on success, <0 on error.
*
* This is very much like batch_list(). The difference between saving
* elements directly, and picking apart two-tuples, is so long-winded at
* the C level, though, that attempts to combine these routines were too
* ugly to bear.
*/
static int
batch_dict(Picklerobject *self, PyObject *iter)
{
PyObject *p;
PyObject *slice[BATCHSIZE];
int i, n;
static char setitem = SETITEM;
static char setitems = SETITEMS;
assert(iter != NULL);
if (self->proto == 0) {
/* SETITEMS isn't available; do one at a time. */
for (;;) {
p = PyIter_Next(iter);
if (p == NULL) {
if (PyErr_Occurred())
return -1;
break;
}
if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) {
PyErr_SetString(PyExc_TypeError, "dict items "
"iterator must return 2-tuples");
return -1;
}
i = save(self, PyTuple_GET_ITEM(p, 0), 0);
if (i >= 0)
i = save(self, PyTuple_GET_ITEM(p, 1), 0);
Py_DECREF(p);
if (i < 0)
return -1;
if (self->write_func(self, &setitem, 1) < 0)
return -1;
}
return 0;
}
/* proto > 0: write in batches of BATCHSIZE. */
do {
/* Get next group of (no more than) BATCHSIZE elements. */
for (n = 0; n < BATCHSIZE; ++n) {
p = PyIter_Next(iter);
if (p == NULL) {
if (PyErr_Occurred())
goto BatchFailed;
break;
}
if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) {
PyErr_SetString(PyExc_TypeError, "dict items "
"iterator must return 2-tuples");
goto BatchFailed;
}
slice[n] = p;
}
if (n > 1) {
/* Pump out MARK, slice[0:n], SETITEMS. */
if (self->write_func(self, &MARKv, 1) < 0)
goto BatchFailed;
for (i = 0; i < n; ++i) {
p = slice[i];
if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0)
goto BatchFailed;
if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0)
goto BatchFailed;
}
if (self->write_func(self, &setitems, 1) < 0)
goto BatchFailed;
}
else if (n == 1) {
p = slice[0];
if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0)
goto BatchFailed;
if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0)
goto BatchFailed;
if (self->write_func(self, &setitem, 1) < 0)
goto BatchFailed;
}
for (i = 0; i < n; ++i) {
Py_DECREF(slice[i]);
}
}while (n == BATCHSIZE);
return 0;
BatchFailed:
while (--n >= 0) {
Py_DECREF(slice[n]);
}
return -1;
}
static int static int
save_dict(Picklerobject *self, PyObject *args) save_dict(Picklerobject *self, PyObject *args)
{ {
PyObject *key = 0, *value = 0; int res = -1;
int i, len, res = -1, using_setitems;
char s[3]; char s[3];
int len;
static char setitem = SETITEM, setitems = SETITEMS; PyObject *iter;
if (self->fast && !fast_save_enter(self, args)) if (self->fast && !fast_save_enter(self, args))
goto finally; goto finally;
/* Create an empty dict. */
if (self->bin) { if (self->bin) {
s[0] = EMPTY_DICT; s[0] = EMPTY_DICT;
len = 1; len = 1;
...@@ -1781,6 +1887,7 @@ save_dict(Picklerobject *self, PyObject *args) ...@@ -1781,6 +1887,7 @@ save_dict(Picklerobject *self, PyObject *args)
if (self->write_func(self, s, len) < 0) if (self->write_func(self, s, len) < 0)
goto finally; goto finally;
/* Get dict size, and bow out early if empty. */
if ((len = PyDict_Size(args)) < 0) if ((len = PyDict_Size(args)) < 0)
goto finally; goto finally;
...@@ -1793,30 +1900,12 @@ save_dict(Picklerobject *self, PyObject *args) ...@@ -1793,30 +1900,12 @@ save_dict(Picklerobject *self, PyObject *args)
goto finally; goto finally;
} }
if ((using_setitems = (self->bin && (PyDict_Size(args) > 1)))) /* Materialize the dict items. */
if (self->write_func(self, &MARKv, 1) < 0) iter = PyObject_CallMethod(args, "iteritems", "()");
goto finally; if (iter == NULL)
goto finally;
i = 0; res = batch_dict(self, iter);
while (PyDict_Next(args, &i, &key, &value)) { Py_DECREF(iter);
if (save(self, key, 0) < 0)
goto finally;
if (save(self, value, 0) < 0)
goto finally;
if (!using_setitems) {
if (self->write_func(self, &setitem, 1) < 0)
goto finally;
}
}
if (using_setitems) {
if (self->write_func(self, &setitems, 1) < 0)
goto finally;
}
res = 0;
finally: finally:
if (self->fast && !fast_save_leave(self, args)) if (self->fast && !fast_save_leave(self, args))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment