Commit 409b039b authored by Antoine Pitrou's avatar Antoine Pitrou

Issue #13521: dict.setdefault() now does only one lookup for the given key,...

Issue #13521: dict.setdefault() now does only one lookup for the given key, making it "atomic" for many purposes.
Patch by Filip Gruszczyński.
parent f590edeb
......@@ -299,6 +299,26 @@ class DictTest(unittest.TestCase):
x.fail = True
self.assertRaises(Exc, d.setdefault, x, [])
def test_setdefault_atomic(self):
# Issue #13521: setdefault() calls __hash__ and __eq__ only once.
class Hashed(object):
def __init__(self):
self.hash_count = 0
self.eq_count = 0
def __hash__(self):
self.hash_count += 1
return 42
def __eq__(self, other):
self.eq_count += 1
return id(self) == id(other)
hashed1 = Hashed()
y = {hashed1: 5}
hashed2 = Hashed()
y.setdefault(hashed2, [])
self.assertEqual(hashed1.hash_count, 1)
self.assertEqual(hashed2.hash_count, 1)
self.assertEqual(hashed1.eq_count + hashed2.eq_count, 1)
def test_popitem(self):
# dict.popitem()
for copymode in -1, +1:
......
......@@ -9,6 +9,9 @@ What's New in Python 2.7.3 release candidate 1?
Core and Builtins
-----------------
- Issue #13521: dict.setdefault() now does only one lookup for the given key,
making it "atomic" for many purposes. Patch by Filip Gruszczyński.
- Issue #13020: Fix a reference leak when allocating a structsequence object
fails. Patch by Suman Saha.
......
......@@ -502,27 +502,16 @@ _PyDict_MaybeUntrack(PyObject *op)
_PyObject_GC_UNTRACK(op);
}
/*
Internal routine to insert a new item into the table.
Used both by the internal resize routine and by the public insert routine.
Eats a reference to key and one to value.
Returns -1 if an error occurred, or 0 on success.
Internal routine to insert a new item into the table when you have entry object.
Used by insertdict.
*/
static int
insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
insertdict_by_entry(register PyDictObject *mp, PyObject *key, long hash,
PyDictEntry *ep, PyObject *value)
{
PyObject *old_value;
register PyDictEntry *ep;
typedef PyDictEntry *(*lookupfunc)(PyDictObject *, PyObject *, long);
assert(mp->ma_lookup != NULL);
ep = mp->ma_lookup(mp, key, hash);
if (ep == NULL) {
Py_DECREF(key);
Py_DECREF(value);
return -1;
}
MAINTAIN_TRACKING(mp, key, value);
if (ep->me_value != NULL) {
old_value = ep->me_value;
......@@ -545,6 +534,28 @@ insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
return 0;
}
/*
Internal routine to insert a new item into the table.
Used both by the internal resize routine and by the public insert routine.
Eats a reference to key and one to value.
Returns -1 if an error occurred, or 0 on success.
*/
static int
insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
{
register PyDictEntry *ep;
assert(mp->ma_lookup != NULL);
ep = mp->ma_lookup(mp, key, hash);
if (ep == NULL) {
Py_DECREF(key);
Py_DECREF(value);
return -1;
}
return insertdict_by_entry(mp, key, hash, ep, value);
}
/*
Internal routine used by dictresize() to insert an item which is
known to be absent from the dict. This routine also assumes that
......@@ -738,6 +749,45 @@ PyDict_GetItem(PyObject *op, PyObject *key)
return ep->me_value;
}
static int
dict_set_item_by_hash_or_entry(register PyObject *op, PyObject *key,
long hash, PyDictEntry *ep, PyObject *value)
{
register PyDictObject *mp;
register Py_ssize_t n_used;
mp = (PyDictObject *)op;
assert(mp->ma_fill <= mp->ma_mask); /* at least one empty slot */
n_used = mp->ma_used;
Py_INCREF(value);
Py_INCREF(key);
if (ep == NULL) {
if (insertdict(mp, key, hash, value) != 0)
return -1;
}
else {
if (insertdict_by_entry(mp, key, hash, ep, value) != 0)
return -1;
}
/* If we added a key, we can safely resize. Otherwise just return!
* If fill >= 2/3 size, adjust size. Normally, this doubles or
* quaduples the size, but it's also possible for the dict to shrink
* (if ma_fill is much larger than ma_used, meaning a lot of dict
* keys have been * deleted).
*
* Quadrupling the size improves average dictionary sparseness
* (reducing collisions) at the cost of some memory and iteration
* speed (which loops over every possible entry). It also halves
* the number of expensive resize operations in a growing dictionary.
*
* Very large dictionaries (over 50K items) use doubling instead.
* This may help applications with severe memory constraints.
*/
if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
return 0;
return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
}
/* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
* dictionary if it's merely replacing the value for an existing key.
* This means that it's safe to loop over a dictionary with PyDict_Next()
......@@ -747,9 +797,7 @@ PyDict_GetItem(PyObject *op, PyObject *key)
int
PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
{
register PyDictObject *mp;
register long hash;
register Py_ssize_t n_used;
if (!PyDict_Check(op)) {
PyErr_BadInternalCall();
......@@ -757,7 +805,6 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
}
assert(key);
assert(value);
mp = (PyDictObject *)op;
if (PyString_CheckExact(key)) {
hash = ((PyStringObject *)key)->ob_shash;
if (hash == -1)
......@@ -768,29 +815,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
if (hash == -1)
return -1;
}
assert(mp->ma_fill <= mp->ma_mask); /* at least one empty slot */
n_used = mp->ma_used;
Py_INCREF(value);
Py_INCREF(key);
if (insertdict(mp, key, hash, value) != 0)
return -1;
/* If we added a key, we can safely resize. Otherwise just return!
* If fill >= 2/3 size, adjust size. Normally, this doubles or
* quaduples the size, but it's also possible for the dict to shrink
* (if ma_fill is much larger than ma_used, meaning a lot of dict
* keys have been * deleted).
*
* Quadrupling the size improves average dictionary sparseness
* (reducing collisions) at the cost of some memory and iteration
* speed (which loops over every possible entry). It also halves
* the number of expensive resize operations in a growing dictionary.
*
* Very large dictionaries (over 50K items) use doubling instead.
* This may help applications with severe memory constraints.
*/
if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
return 0;
return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
return dict_set_item_by_hash_or_entry(op, key, hash, NULL, value);
}
int
......@@ -1957,9 +1982,9 @@ dict_setdefault(register PyDictObject *mp, PyObject *args)
return NULL;
val = ep->me_value;
if (val == NULL) {
val = failobj;
if (PyDict_SetItem((PyObject*)mp, key, failobj))
val = NULL;
if (dict_set_item_by_hash_or_entry((PyObject*)mp, key, hash, ep,
failobj) == 0)
val = failobj;
}
Py_XINCREF(val);
return val;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment