Commit d5ca509e authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #28199: Microoptimized dict resizing. Based on patch by Naoki Inada.

parent 15316431
......@@ -1195,41 +1195,21 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
}
/*
Internal routine used by dictresize() to insert an item which is
known to be absent from the dict. This routine also assumes that
the dict contains no deleted entries. Besides the performance benefit,
using insertdict() in dictresize() is dangerous (SF bug #1456209).
Note that no refcounts are changed by this routine; if needed, the caller
is responsible for incref'ing `key` and `value`.
Neither mp->ma_used nor k->dk_usable are modified by this routine; the caller
must set them correctly
Internal routine used by dictresize() to buid a hashtable of entries.
*/
static void
insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
PyObject *value)
build_indices(PyDictKeysObject *keys, PyDictKeyEntry *ep, Py_ssize_t n)
{
size_t i;
PyDictKeysObject *k = mp->ma_keys;
size_t mask = (size_t)DK_SIZE(k)-1;
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
PyDictKeyEntry *ep;
assert(k->dk_lookup != NULL);
assert(value != NULL);
assert(key != NULL);
assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict);
i = hash & mask;
for (size_t perturb = hash; dk_get_index(k, i) != DKIX_EMPTY;) {
size_t mask = (size_t)DK_SIZE(keys) - 1;
for (Py_ssize_t ix = 0; ix != n; ix++, ep++) {
Py_hash_t hash = ep->me_hash;
size_t i = hash & mask;
for (size_t perturb = hash; dk_get_index(keys, i) != DKIX_EMPTY;) {
perturb >>= PERTURB_SHIFT;
i = mask & ((i << 2) + i + perturb + 1);
}
ep = &ep0[k->dk_nentries];
assert(ep->me_value == NULL);
dk_set_index(k, i, k->dk_nentries);
k->dk_nentries++;
ep->me_key = key;
ep->me_hash = hash;
ep->me_value = value;
dk_set_index(keys, i, ix);
}
}
/*
......@@ -1245,10 +1225,10 @@ but can be resplit by make_keys_shared().
static int
dictresize(PyDictObject *mp, Py_ssize_t minused)
{
Py_ssize_t i, newsize;
Py_ssize_t newsize, numentries;
PyDictKeysObject *oldkeys;
PyObject **oldvalues;
PyDictKeyEntry *ep0;
PyDictKeyEntry *oldentries, *newentries;
/* Find the smallest table size > minused. */
for (newsize = PyDict_MINSIZE;
......@@ -1259,8 +1239,14 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
PyErr_NoMemory();
return -1;
}
oldkeys = mp->ma_keys;
oldvalues = mp->ma_values;
/* NOTE: Current odict checks mp->ma_keys to detect resize happen.
* So we can't reuse oldkeys even if oldkeys->dk_size == newsize.
* TODO: Try reusing oldkeys when reimplement odict.
*/
/* Allocate a new table. */
mp->ma_keys = new_keys_object(newsize);
if (mp->ma_keys == NULL) {
......@@ -1269,42 +1255,59 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
}
if (oldkeys->dk_lookup == lookdict)
mp->ma_keys->dk_lookup = lookdict;
mp->ma_values = NULL;
ep0 = DK_ENTRIES(oldkeys);
/* Main loop below assumes we can transfer refcount to new keys
* and that value is stored in me_value.
* Increment ref-counts and copy values here to compensate
* This (resizing a split table) should be relatively rare */
numentries = mp->ma_used;
oldentries = DK_ENTRIES(oldkeys);
newentries = DK_ENTRIES(mp->ma_keys);
oldvalues = mp->ma_values;
if (oldvalues != NULL) {
for (i = 0; i < oldkeys->dk_nentries; i++) {
if (oldvalues[i] != NULL) {
Py_INCREF(ep0[i].me_key);
ep0[i].me_value = oldvalues[i];
}
}
}
/* Main loop */
for (i = 0; i < oldkeys->dk_nentries; i++) {
PyDictKeyEntry *ep = &ep0[i];
if (ep->me_value != NULL) {
insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
}
/* Convert split table into new combined table.
* We must incref keys; we can transfer values.
* Note that values of split table is always dense.
*/
for (Py_ssize_t i = 0; i < numentries; i++) {
assert(oldvalues[i] != NULL);
PyDictKeyEntry *ep = &oldentries[i];
PyObject *key = ep->me_key;
Py_INCREF(key);
newentries[i].me_key = key;
newentries[i].me_hash = ep->me_hash;
newentries[i].me_value = oldvalues[i];
}
mp->ma_keys->dk_usable -= mp->ma_used;
if (oldvalues != NULL) {
/* NULL out me_value slot in oldkeys, in case it was shared */
for (i = 0; i < oldkeys->dk_nentries; i++)
ep0[i].me_value = NULL;
DK_DECREF(oldkeys);
mp->ma_values = NULL;
if (oldvalues != empty_values) {
free_values(oldvalues);
}
}
else { // combined table.
if (oldkeys->dk_nentries == numentries) {
memcpy(newentries, oldentries, numentries * sizeof(PyDictKeyEntry));
}
else {
PyDictKeyEntry *ep = oldentries;
for (Py_ssize_t i = 0; i < numentries; i++) {
while (ep->me_value == NULL)
ep++;
newentries[i] = *ep++;
}
}
assert(oldkeys->dk_lookup != lookdict_split);
assert(oldkeys->dk_refcnt == 1);
if (oldkeys->dk_size == PyDict_MINSIZE &&
numfreekeys < PyDict_MAXFREELIST) {
DK_DEBUG_DECREF keys_free_list[numfreekeys++] = oldkeys;
}
else {
DK_DEBUG_DECREF PyObject_FREE(oldkeys);
}
}
build_indices(mp->ma_keys, newentries, numentries);
mp->ma_keys->dk_usable -= numentries;
mp->ma_keys->dk_nentries = numentries;
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment