Commit d1448f98 authored by Raymond Hettinger's avatar Raymond Hettinger

* Add short-circuit code for in-place operations with self (such as

  s|=s, s&=s, s-=s, or s^=s).  Add related tests.

* Improve names for several variables and functions.

* Provide alternate table access functions (next, contains, add, and discard)
  that work with an entry argument instead of just a key.  This improves
  set-vs-set operations because we already have a hash value for each key
  and can avoid unnecessary calls to PyObject_Hash().  Provides a 5% to 20%
  speed-up for quick hashing elements like strings and integers.  Provides
  much more substantial improvements for slow hashing elements like tuples
  or objects defining a custom __hash__() function.

* Have difference operations resize() when 1/5 of the elements are dummies.
  Formerly, it was 1/6.  The new ratio triggers less frequently and only
  in cases that it can resize quicker and with greater benefit.  The right
  answer is probably either 1/4, 1/5, or 1/6.  Picked the middle value for
  an even trade-off between resize time and the space/time costs of dummy
  entries.
parent 118bc893
...@@ -370,6 +370,18 @@ class TestSet(TestJointOps): ...@@ -370,6 +370,18 @@ class TestSet(TestJointOps):
else: else:
self.assert_(c not in self.s) self.assert_(c not in self.s)
def test_inplace_on_self(self):
t = self.s.copy()
t |= t
self.assertEqual(t, self.s)
t &= t
self.assertEqual(t, self.s)
t -= t
self.assertEqual(t, self.thetype())
t = self.s.copy()
t ^= t
self.assertEqual(t, self.thetype())
def test_weakref(self): def test_weakref(self):
s = self.thetype('gallahad') s = self.thetype('gallahad')
p = proxy(s) p = proxy(s)
......
/* set object implementation /* set object implementation
Written and maintained by Raymond D. Hettinger <python@rcn.com> Written and maintained by Raymond D. Hettinger <python@rcn.com>
Derived from Lib/sets.py and Objects/dictobject.c. Derived from Lib/sets.py and Objects/dictobject.c.
...@@ -226,7 +227,6 @@ set_insert_key(register PySetObject *so, PyObject *key, long hash) ...@@ -226,7 +227,6 @@ set_insert_key(register PySetObject *so, PyObject *key, long hash)
typedef setentry *(*lookupfunc)(PySetObject *, PyObject *, long); typedef setentry *(*lookupfunc)(PySetObject *, PyObject *, long);
assert(so->lookup != NULL); assert(so->lookup != NULL);
entry = so->lookup(so, key, hash); entry = so->lookup(so, key, hash);
if (entry->key == NULL) { if (entry->key == NULL) {
/* UNUSED */ /* UNUSED */
...@@ -336,18 +336,30 @@ set_table_resize(PySetObject *so, int minused) ...@@ -336,18 +336,30 @@ set_table_resize(PySetObject *so, int minused)
return 0; return 0;
} }
/* CAUTION: set_add_internal() must guarantee that it won't resize the table */ /* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
static int
set_add_entry(register PySetObject *so, setentry *entry)
{
register int n_used;
assert(so->fill <= so->mask); /* at least one empty slot */
n_used = so->used;
Py_INCREF(entry->key);
set_insert_key(so, entry->key, entry->hash);
if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
return 0;
return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
}
static int static int
set_add_internal(register PySetObject *so, PyObject *key) set_add_key(register PySetObject *so, PyObject *key)
{ {
register long hash; register long hash;
register int n_used; register int n_used;
if (PyString_CheckExact(key)) { if (!PyString_CheckExact(key) ||
hash = ((PyStringObject *)key)->ob_shash; (hash = ((PyStringObject *) key)->ob_shash) == -1) {
if (hash == -1)
hash = PyObject_Hash(key);
} else {
hash = PyObject_Hash(key); hash = PyObject_Hash(key);
if (hash == -1) if (hash == -1)
return -1; return -1;
...@@ -365,7 +377,23 @@ set_add_internal(register PySetObject *so, PyObject *key) ...@@ -365,7 +377,23 @@ set_add_internal(register PySetObject *so, PyObject *key)
#define DISCARD_FOUND 1 #define DISCARD_FOUND 1
static int static int
set_discard_internal(PySetObject *so, PyObject *key) set_discard_entry(PySetObject *so, setentry *oldentry)
{ register setentry *entry;
PyObject *old_key;
entry = (so->lookup)(so, oldentry->key, oldentry->hash);
if (entry->key == NULL || entry->key == dummy)
return DISCARD_NOTFOUND;
old_key = entry->key;
Py_INCREF(dummy);
entry->key = dummy;
so->used--;
Py_DECREF(old_key);
return DISCARD_FOUND;
}
static int
set_discard_key(PySetObject *so, PyObject *key)
{ {
register long hash; register long hash;
register setentry *entry; register setentry *entry;
...@@ -457,39 +485,39 @@ set_clear_internal(PySetObject *so) ...@@ -457,39 +485,39 @@ set_clear_internal(PySetObject *so)
* Iterate over a set table. Use like so: * Iterate over a set table. Use like so:
* *
* int pos; * int pos;
* PyObject *key; * setentry *entry;
* pos = 0; # important! pos should not otherwise be changed by you * pos = 0; # important! pos should not otherwise be changed by you
* while (set_next_internal(yourset, &pos, &key)) { * while (set_next(yourset, &pos, &entry)) {
* Refer to borrowed reference in key. * Refer to borrowed reference in entry->key.
* } * }
* *
* CAUTION: In general, it isn't safe to use set_next_internal in a loop that * CAUTION: In general, it isn't safe to use set_next in a loop that
* mutates the table. * mutates the table.
*/ */
static int static int
set_next_internal(PySetObject *so, int *pos, PyObject **key) set_next(PySetObject *so, int *pos_ptr, setentry **entry_ptr)
{ {
register int i, mask; register int i, mask;
register setentry *entry; register setentry *table;
assert (PyAnySet_Check(so)); assert (PyAnySet_Check(so));
i = *pos; i = *pos_ptr;
if (i < 0) if (i < 0)
return 0; return 0;
entry = so->table; table = so->table;
mask = so->mask; mask = so->mask;
while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy)) while (i <= mask && (table[i].key == NULL || table[i].key == dummy))
i++; i++;
*pos = i+1; *pos_ptr = i+1;
if (i > mask) if (i > mask)
return 0; return 0;
if (key) if (table[i].key)
*key = entry[i].key; *entry_ptr = &table[i];
return 1; return 1;
} }
static int static int
set_merge_internal(PySetObject *so, PyObject *otherset) set_merge(PySetObject *so, PyObject *otherset)
{ {
PySetObject *other; PySetObject *other;
register int i; register int i;
...@@ -525,7 +553,7 @@ set_merge_internal(PySetObject *so, PyObject *otherset) ...@@ -525,7 +553,7 @@ set_merge_internal(PySetObject *so, PyObject *otherset)
} }
static int static int
set_contains_internal(PySetObject *so, PyObject *key) set_contains_key(PySetObject *so, PyObject *key)
{ {
long hash; long hash;
...@@ -539,6 +567,15 @@ set_contains_internal(PySetObject *so, PyObject *key) ...@@ -539,6 +567,15 @@ set_contains_internal(PySetObject *so, PyObject *key)
return key != NULL && key != dummy; return key != NULL && key != dummy;
} }
static int
set_contains_entry(PySetObject *so, setentry *entry)
{
PyObject *key;
key = (so->lookup)(so, entry->key, entry->hash)->key;
return key != NULL && key != dummy;
}
/***** Set iterator type ***********************************************/ /***** Set iterator type ***********************************************/
static PyTypeObject PySetIter_Type; /* Forward */ static PyTypeObject PySetIter_Type; /* Forward */
...@@ -667,13 +704,13 @@ set_update_internal(PySetObject *so, PyObject *other) ...@@ -667,13 +704,13 @@ set_update_internal(PySetObject *so, PyObject *other)
PyObject *key, *it; PyObject *key, *it;
if (PyAnySet_Check(other)) if (PyAnySet_Check(other))
return set_merge_internal(so, other); return set_merge(so, other);
if (PyDict_Check(other)) { if (PyDict_Check(other)) {
PyObject *key, *value; PyObject *key, *value;
int pos = 0; int pos = 0;
while (PyDict_Next(other, &pos, &key, &value)) { while (PyDict_Next(other, &pos, &key, &value)) {
if (set_add_internal(so, key) == -1) if (set_add_key(so, key) == -1)
return -1; return -1;
} }
return 0; return 0;
...@@ -684,7 +721,7 @@ set_update_internal(PySetObject *so, PyObject *other) ...@@ -684,7 +721,7 @@ set_update_internal(PySetObject *so, PyObject *other)
return -1; return -1;
while ((key = PyIter_Next(it)) != NULL) { while ((key = PyIter_Next(it)) != NULL) {
if (set_add_internal(so, key) == -1) { if (set_add_key(so, key) == -1) {
Py_DECREF(it); Py_DECREF(it);
Py_DECREF(key); Py_DECREF(key);
return -1; return -1;
...@@ -833,10 +870,10 @@ static int ...@@ -833,10 +870,10 @@ static int
set_traverse(PySetObject *so, visitproc visit, void *arg) set_traverse(PySetObject *so, visitproc visit, void *arg)
{ {
int pos = 0; int pos = 0;
PyObject *key; setentry *entry;
while (set_next_internal(so, &pos, &key)) while (set_next(so, &pos, &entry))
Py_VISIT(key); Py_VISIT(entry->key);
return 0; return 0;
} }
...@@ -897,14 +934,14 @@ set_contains(PySetObject *so, PyObject *key) ...@@ -897,14 +934,14 @@ set_contains(PySetObject *so, PyObject *key)
PyObject *tmpkey; PyObject *tmpkey;
int result; int result;
result = set_contains_internal(so, key); result = set_contains_key(so, key);
if (result == -1 && PyAnySet_Check(key)) { if (result == -1 && PyAnySet_Check(key)) {
PyErr_Clear(); PyErr_Clear();
tmpkey = make_new_set(&PyFrozenSet_Type, NULL); tmpkey = make_new_set(&PyFrozenSet_Type, NULL);
if (tmpkey == NULL) if (tmpkey == NULL)
return -1; return -1;
set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key); set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
result = set_contains_internal(so, tmpkey); result = set_contains_key(so, tmpkey);
set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key); set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
Py_DECREF(tmpkey); Py_DECREF(tmpkey);
} }
...@@ -942,6 +979,15 @@ frozenset_copy(PySetObject *so) ...@@ -942,6 +979,15 @@ frozenset_copy(PySetObject *so)
PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set."); PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
static PyObject *
set_clear(PySetObject *so)
{
set_clear_internal(so);
Py_RETURN_NONE;
}
PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
static PyObject * static PyObject *
set_union(PySetObject *so, PyObject *other) set_union(PySetObject *so, PyObject *other)
{ {
...@@ -991,6 +1037,11 @@ set_intersection(PySetObject *so, PyObject *other) ...@@ -991,6 +1037,11 @@ set_intersection(PySetObject *so, PyObject *other)
PySetObject *result; PySetObject *result;
PyObject *key, *it, *tmp; PyObject *key, *it, *tmp;
if ((PyObject *)so == other) {
Py_INCREF(other);
return other;
}
result = (PySetObject *)make_new_set(so->ob_type, NULL); result = (PySetObject *)make_new_set(so->ob_type, NULL);
if (result == NULL) if (result == NULL)
return NULL; return NULL;
...@@ -1001,11 +1052,12 @@ set_intersection(PySetObject *so, PyObject *other) ...@@ -1001,11 +1052,12 @@ set_intersection(PySetObject *so, PyObject *other)
other = tmp; other = tmp;
} }
if (PyAnySet_Check(other)) { if (PyAnySet_Check(other)) {
int pos = 0; int pos = 0;
while (set_next_internal((PySetObject *)other, &pos, &key)) { setentry *entry;
if (set_contains_internal(so, key)) { while (set_next((PySetObject *)other, &pos, &entry)) {
if (set_add_internal(result, key) == -1) { if (set_contains_entry(so, entry)) {
if (set_add_entry(result, entry) == -1) {
Py_DECREF(result); Py_DECREF(result);
return NULL; return NULL;
} }
...@@ -1021,8 +1073,8 @@ set_intersection(PySetObject *so, PyObject *other) ...@@ -1021,8 +1073,8 @@ set_intersection(PySetObject *so, PyObject *other)
} }
while ((key = PyIter_Next(it)) != NULL) { while ((key = PyIter_Next(it)) != NULL) {
if (set_contains_internal(so, key)) { if (set_contains_key(so, key)) {
if (set_add_internal(result, key) == -1) { if (set_add_key(result, key) == -1) {
Py_DECREF(it); Py_DECREF(it);
Py_DECREF(result); Py_DECREF(result);
Py_DECREF(key); Py_DECREF(key);
...@@ -1087,32 +1139,48 @@ set_iand(PySetObject *so, PyObject *other) ...@@ -1087,32 +1139,48 @@ set_iand(PySetObject *so, PyObject *other)
return (PyObject *)so; return (PyObject *)so;
} }
static PyObject * int
set_difference_update(PySetObject *so, PyObject *other) set_difference_update_internal(PySetObject *so, PyObject *other)
{ {
PyObject *key, *it; if ((PyObject *)so == other)
return set_clear_internal(so);
it = PyObject_GetIter(other); if (PyAnySet_Check(other)) {
if (it == NULL) setentry *entry;
return NULL; int pos = 0;
while ((key = PyIter_Next(it)) != NULL) { while (set_next((PySetObject *)other, &pos, &entry))
if (set_discard_internal(so, key) == -1) { set_discard_entry(so, entry);
Py_DECREF(it); } else {
PyObject *key, *it;
it = PyObject_GetIter(other);
if (it == NULL)
return -1;
while ((key = PyIter_Next(it)) != NULL) {
if (set_discard_key(so, key) == -1) {
Py_DECREF(it);
Py_DECREF(key);
return -1;
}
Py_DECREF(key); Py_DECREF(key);
return NULL;
} }
Py_DECREF(key); Py_DECREF(it);
if (PyErr_Occurred())
return -1;
} }
Py_DECREF(it); /* If more than 1/5 are dummies, then resize them away. */
if (PyErr_Occurred()) if ((so->fill - so->used) * 5 < so->mask)
return NULL; return 0;
/* If more than 1/6 are dummies, then resize them away. */ return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
if ((so->fill - so->used) * 6 < so->mask) }
static PyObject *
set_difference_update(PySetObject *so, PyObject *other)
{
if (set_difference_update_internal(so, other) != -1)
Py_RETURN_NONE; Py_RETURN_NONE;
if (set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4) == -1) return NULL;
return NULL;
Py_RETURN_NONE;
} }
PyDoc_STRVAR(difference_update_doc, PyDoc_STRVAR(difference_update_doc,
...@@ -1121,18 +1189,16 @@ PyDoc_STRVAR(difference_update_doc, ...@@ -1121,18 +1189,16 @@ PyDoc_STRVAR(difference_update_doc,
static PyObject * static PyObject *
set_difference(PySetObject *so, PyObject *other) set_difference(PySetObject *so, PyObject *other)
{ {
PyObject *tmp, *key, *result; PyObject *result;
setentry *entry;
int pos = 0; int pos = 0;
if (!PyAnySet_Check(other) && !PyDict_Check(other)) { if (!PyAnySet_Check(other) && !PyDict_Check(other)) {
result = set_copy(so); result = set_copy(so);
if (result == NULL) if (result == NULL)
return NULL;
if (set_difference_update_internal((PySetObject *)result, other) != -1)
return result; return result;
tmp = set_difference_update((PySetObject *)result, other);
if (tmp != NULL) {
Py_DECREF(tmp);
return result;
}
Py_DECREF(result); Py_DECREF(result);
return NULL; return NULL;
} }
...@@ -1142,18 +1208,21 @@ set_difference(PySetObject *so, PyObject *other) ...@@ -1142,18 +1208,21 @@ set_difference(PySetObject *so, PyObject *other)
return NULL; return NULL;
if (PyDict_Check(other)) { if (PyDict_Check(other)) {
while (set_next_internal(so, &pos, &key)) { while (set_next(so, &pos, &entry)) {
if (!PyDict_Contains(other, key)) { setentry entrycopy;
if (set_add_internal((PySetObject *)result, key) == -1) entrycopy.hash = entry->hash;
entrycopy.key = entry->key;
if (!PyDict_Contains(other, entry->key)) {
if (set_add_entry((PySetObject *)result, &entrycopy) == -1)
return NULL; return NULL;
} }
} }
return result; return result;
} }
while (set_next_internal(so, &pos, &key)) { while (set_next(so, &pos, &entry)) {
if (!set_contains_internal((PySetObject *)other, key)) { if (!set_contains_entry((PySetObject *)other, entry)) {
if (set_add_internal((PySetObject *)result, key) == -1) if (set_add_entry((PySetObject *)result, entry) == -1)
return NULL; return NULL;
} }
} }
...@@ -1197,16 +1266,20 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) ...@@ -1197,16 +1266,20 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other)
PySetObject *otherset; PySetObject *otherset;
PyObject *key; PyObject *key;
int pos = 0; int pos = 0;
setentry *entry;
if ((PyObject *)so == other)
return set_clear(so);
if (PyDict_Check(other)) { if (PyDict_Check(other)) {
PyObject *value; PyObject *value;
int rv; int rv;
while (PyDict_Next(other, &pos, &key, &value)) { while (PyDict_Next(other, &pos, &key, &value)) {
rv = set_discard_internal(so, key); rv = set_discard_key(so, key);
if (rv == -1) if (rv == -1)
return NULL; return NULL;
if (rv == DISCARD_NOTFOUND) { if (rv == DISCARD_NOTFOUND) {
if (set_add_internal(so, key) == -1) if (set_add_key(so, key) == -1)
return NULL; return NULL;
} }
} }
...@@ -1222,14 +1295,14 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) ...@@ -1222,14 +1295,14 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other)
return NULL; return NULL;
} }
while (set_next_internal(otherset, &pos, &key)) { while (set_next(otherset, &pos, &entry)) {
int rv = set_discard_internal(so, key); int rv = set_discard_entry(so, entry);
if (rv == -1) { if (rv == -1) {
Py_XDECREF(otherset); Py_XDECREF(otherset);
return NULL; return NULL;
} }
if (rv == DISCARD_NOTFOUND) { if (rv == DISCARD_NOTFOUND) {
if (set_add_internal(so, key) == -1) { if (set_add_entry(so, entry) == -1) {
Py_XDECREF(otherset); Py_XDECREF(otherset);
return NULL; return NULL;
} }
...@@ -1312,7 +1385,7 @@ set_issubset(PySetObject *so, PyObject *other) ...@@ -1312,7 +1385,7 @@ set_issubset(PySetObject *so, PyObject *other)
for (i=so->used ; i ; entry++, i--) { for (i=so->used ; i ; entry++, i--) {
while (entry->key == NULL || entry->key==dummy) while (entry->key == NULL || entry->key==dummy)
entry++; entry++;
if (!set_contains_internal((PySetObject *)other, entry->key)) if (!set_contains_entry((PySetObject *)other, entry))
Py_RETURN_FALSE; Py_RETURN_FALSE;
} }
Py_RETURN_TRUE; Py_RETURN_TRUE;
...@@ -1448,35 +1521,26 @@ set_repr(PySetObject *so) ...@@ -1448,35 +1521,26 @@ set_repr(PySetObject *so)
static int static int
set_tp_print(PySetObject *so, FILE *fp, int flags) set_tp_print(PySetObject *so, FILE *fp, int flags)
{ {
PyObject *key; setentry *entry;
int pos=0; int pos=0;
char *emit = ""; /* No separator emitted on first pass */ char *emit = ""; /* No separator emitted on first pass */
char *separator = ", "; char *separator = ", ";
fprintf(fp, "%s([", so->ob_type->tp_name); fprintf(fp, "%s([", so->ob_type->tp_name);
while (set_next_internal(so, &pos, &key)) { while (set_next(so, &pos, &entry)) {
fputs(emit, fp); fputs(emit, fp);
emit = separator; emit = separator;
if (PyObject_Print(key, fp, 0) != 0) if (PyObject_Print(entry->key, fp, 0) != 0)
return -1; return -1;
} }
fputs("])", fp); fputs("])", fp);
return 0; return 0;
} }
static PyObject *
set_clear(PySetObject *so)
{
set_clear_internal(so);
Py_RETURN_NONE;
}
PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
static PyObject * static PyObject *
set_add(PySetObject *so, PyObject *key) set_add(PySetObject *so, PyObject *key)
{ {
if (set_add_internal(so, key) == -1) if (set_add_key(so, key) == -1)
return NULL; return NULL;
Py_RETURN_NONE; Py_RETURN_NONE;
} }
...@@ -1503,7 +1567,7 @@ set_remove(PySetObject *so, PyObject *key) ...@@ -1503,7 +1567,7 @@ set_remove(PySetObject *so, PyObject *key)
return result; return result;
} }
rv = set_discard_internal(so, key); rv = set_discard_key(so, key);
if (rv == -1) if (rv == -1)
return NULL; return NULL;
else if (rv == DISCARD_NOTFOUND) { else if (rv == DISCARD_NOTFOUND) {
...@@ -1534,7 +1598,7 @@ set_discard(PySetObject *so, PyObject *key) ...@@ -1534,7 +1598,7 @@ set_discard(PySetObject *so, PyObject *key)
return result; return result;
} }
if (set_discard_internal(so, key) == -1) if (set_discard_key(so, key) == -1)
return NULL; return NULL;
Py_RETURN_NONE; Py_RETURN_NONE;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment