Issue #28199: Microoptimized dict resizing. Based on patch by Naoki Inada.

d5ca509e · Serhiy Storchaka · 15316431 · d5ca509e
Commit d5ca509e authored Oct 29, 2016 by Serhiy Storchaka
Show whitespace changes
Inline Side-by-side

Showing with 63 additions and 60 deletions

Objects/dictobject.c Objects/dictobject.c +63 -60

No files found.
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1195,41 +1195,21 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
 }

 /*
-Internal routine used by dictresize() to insert an item which is
-known to be absent from the dict.  This routine also assumes that
-the dict contains no deleted entries.  Besides the performance benefit,
-using insertdict() in dictresize() is dangerous (SF bug #1456209).
-Note that no refcounts are changed by this routine; if needed, the caller
-is responsible for incref'ing `key` and `value`.
-Neither mp->ma_used nor k->dk_usable are modified by this routine; the caller
-must set them correctly
+Internal routine used by dictresize() to buid a hashtable of entries.
 */
 static void
-insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
-                 PyObject *value)
+build_indices(PyDictKeysObject *keys, PyDictKeyEntry *ep, Py_ssize_t n)
 {
-    size_t i;
-    PyDictKeysObject *k = mp->ma_keys;
-    size_t mask = (size_t)DK_SIZE(k)-1;
-    PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
-    PyDictKeyEntry *ep;
-
-    assert(k->dk_lookup != NULL);
-    assert(value != NULL);
-    assert(key != NULL);
-    assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict);
-    i = hash & mask;
-    for (size_t perturb = hash; dk_get_index(k, i) != DKIX_EMPTY;) {
+    size_t mask = (size_t)DK_SIZE(keys) - 1;
+    for (Py_ssize_t ix = 0; ix != n; ix++, ep++) {
+        Py_hash_t hash = ep->me_hash;
+        size_t i = hash & mask;
+        for (size_t perturb = hash; dk_get_index(keys, i) != DKIX_EMPTY;) {
            perturb >>= PERTURB_SHIFT;
            i = mask & ((i << 2) + i + perturb + 1);
        }
-    ep = &ep0[k->dk_nentries];
-    assert(ep->me_value == NULL);
-    dk_set_index(k, i, k->dk_nentries);
-    k->dk_nentries++;
-    ep->me_key = key;
-    ep->me_hash = hash;
-    ep->me_value = value;
+        dk_set_index(keys, i, ix);
+    }
 }

 /*
@@ -1245,10 +1225,10 @@ but can be resplit by make_keys_shared().
 static int
 dictresize(PyDictObject *mp, Py_ssize_t minused)
 {
-    Py_ssize_t i, newsize;
+    Py_ssize_t newsize, numentries;
    PyDictKeysObject *oldkeys;
    PyObject **oldvalues;
-    PyDictKeyEntry *ep0;
+    PyDictKeyEntry *oldentries, *newentries;

    /* Find the smallest table size > minused. */
    for (newsize = PyDict_MINSIZE;
@@ -1259,8 +1239,14 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
        PyErr_NoMemory();
        return -1;
    }
+
    oldkeys = mp->ma_keys;
-    oldvalues = mp->ma_values;
+
+    /* NOTE: Current odict checks mp->ma_keys to detect resize happen.
+     * So we can't reuse oldkeys even if oldkeys->dk_size == newsize.
+     * TODO: Try reusing oldkeys when reimplement odict.
+     */
+
    /* Allocate a new table. */
    mp->ma_keys = new_keys_object(newsize);
    if (mp->ma_keys == NULL) {
@@ -1269,42 +1255,59 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
    }
    if (oldkeys->dk_lookup == lookdict)
        mp->ma_keys->dk_lookup = lookdict;
-    mp->ma_values = NULL;
-    ep0 = DK_ENTRIES(oldkeys);
-    /* Main loop below assumes we can transfer refcount to new keys
-     * and that value is stored in me_value.
-     * Increment ref-counts and copy values here to compensate
-     * This (resizing a split table) should be relatively rare */
+
+    numentries = mp->ma_used;
+    oldentries = DK_ENTRIES(oldkeys);
+    newentries = DK_ENTRIES(mp->ma_keys);
+    oldvalues = mp->ma_values;
    if (oldvalues != NULL) {
-        for (i = 0; i < oldkeys->dk_nentries; i++) {
-            if (oldvalues[i] != NULL) {
-                Py_INCREF(ep0[i].me_key);
-                ep0[i].me_value = oldvalues[i];
-            }
-        }
-    }
-    /* Main loop */
-    for (i = 0; i < oldkeys->dk_nentries; i++) {
-        PyDictKeyEntry *ep = &ep0[i];
-        if (ep->me_value != NULL) {
-            insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
-        }
+        /* Convert split table into new combined table.
+         * We must incref keys; we can transfer values.
+         * Note that values of split table is always dense.
+         */
+        for (Py_ssize_t i = 0; i < numentries; i++) {
+            assert(oldvalues[i] != NULL);
+            PyDictKeyEntry *ep = &oldentries[i];
+            PyObject *key = ep->me_key;
+            Py_INCREF(key);
+            newentries[i].me_key = key;
+            newentries[i].me_hash = ep->me_hash;
+            newentries[i].me_value = oldvalues[i];
        }
-    mp->ma_keys->dk_usable -= mp->ma_used;
-    if (oldvalues != NULL) {
-        /* NULL out me_value slot in oldkeys, in case it was shared */
-        for (i = 0; i < oldkeys->dk_nentries; i++)
-            ep0[i].me_value = NULL;
+
        DK_DECREF(oldkeys);
+        mp->ma_values = NULL;
        if (oldvalues != empty_values) {
            free_values(oldvalues);
        }
    }
+    else {  // combined table.
+        if (oldkeys->dk_nentries == numentries) {
+            memcpy(newentries, oldentries, numentries * sizeof(PyDictKeyEntry));
+        }
        else {
+            PyDictKeyEntry *ep = oldentries;
+            for (Py_ssize_t i = 0; i < numentries; i++) {
+                while (ep->me_value == NULL)
+                    ep++;
+                newentries[i] = *ep++;
+            }
+        }
+
        assert(oldkeys->dk_lookup != lookdict_split);
        assert(oldkeys->dk_refcnt == 1);
+        if (oldkeys->dk_size == PyDict_MINSIZE &&
+            numfreekeys < PyDict_MAXFREELIST) {
+            DK_DEBUG_DECREF keys_free_list[numfreekeys++] = oldkeys;
+        }
+        else {
            DK_DEBUG_DECREF PyObject_FREE(oldkeys);
        }
+    }
+
+    build_indices(mp->ma_keys, newentries, numentries);
+    mp->ma_keys->dk_usable -= numentries;
+    mp->ma_keys->dk_nentries = numentries;
    return 0;
 }