Commit 9b10f7e0 authored by Tim Peters's avatar Tim Peters

Convert relevant dict internals to Py_ssize_t.

I don't have a box with nearly enough RAM, or an OS,
that could get close to tickling this, though (requires
a dict w/ at least 2**31 entries).
parent 1e44ca94
...@@ -8,7 +8,7 @@ extern "C" { ...@@ -8,7 +8,7 @@ extern "C" {
/* Dictionary object type -- mapping from hashable object to object */ /* Dictionary object type -- mapping from hashable object to object */
/* The distribution includes a separate file, Objects/dictnotes.txt, /* The distribution includes a separate file, Objects/dictnotes.txt,
describing explorations into dictionary design and optimization. describing explorations into dictionary design and optimization.
It covers typical dictionary use patterns, the parameters for It covers typical dictionary use patterns, the parameters for
tuning dictionaries, and several ideas for possible optimizations. tuning dictionaries, and several ideas for possible optimizations.
*/ */
...@@ -48,7 +48,11 @@ meaning otherwise. ...@@ -48,7 +48,11 @@ meaning otherwise.
#define PyDict_MINSIZE 8 #define PyDict_MINSIZE 8
typedef struct { typedef struct {
long me_hash; /* cached hash code of me_key */ /* Cached hash code of me_key. Note that hash codes are C longs.
* We have to use Py_ssize_t instead because dict_popitem() abuses
* me_hash to hold a search finger.
*/
Py_ssize_t me_hash;
PyObject *me_key; PyObject *me_key;
PyObject *me_value; PyObject *me_value;
} PyDictEntry; } PyDictEntry;
...@@ -65,14 +69,14 @@ it's two-thirds full. ...@@ -65,14 +69,14 @@ it's two-thirds full.
typedef struct _dictobject PyDictObject; typedef struct _dictobject PyDictObject;
struct _dictobject { struct _dictobject {
PyObject_HEAD PyObject_HEAD
int ma_fill; /* # Active + # Dummy */ Py_ssize_t ma_fill; /* # Active + # Dummy */
int ma_used; /* # Active */ Py_ssize_t ma_used; /* # Active */
/* The table contains ma_mask + 1 slots, and that's a power of 2. /* The table contains ma_mask + 1 slots, and that's a power of 2.
* We store the mask instead of the size because the mask is more * We store the mask instead of the size because the mask is more
* frequently needed. * frequently needed.
*/ */
int ma_mask; Py_ssize_t ma_mask;
/* ma_table points to ma_smalltable for small tables, else to /* ma_table points to ma_smalltable for small tables, else to
* additional malloc'ed memory. ma_table is never NULL! This rule * additional malloc'ed memory. ma_table is never NULL! This rule
......
...@@ -110,6 +110,16 @@ above, and then shifting perturb can be done while the table index is being ...@@ -110,6 +110,16 @@ above, and then shifting perturb can be done while the table index is being
masked); and the dictobject struct required a member to hold the table's masked); and the dictobject struct required a member to hold the table's
polynomial. In Tim's experiments the current scheme ran faster, produced polynomial. In Tim's experiments the current scheme ran faster, produced
equally good collision statistics, needed less code & used less memory. equally good collision statistics, needed less code & used less memory.
Theoretical Python 2.5 headache: hash codes are only C "long", but
sizeof(Py_ssize_t) > sizeof(long) may be possible. In that case, and if a
dict is genuinely huge, then only the slots directly reachable via indexing
by a C long can be the first slot in a probe sequence. The probe sequence
will still eventually reach every slot in the table, but the collision rate
on initial probes may be much higher than this scheme was designed for.
Getting a hash code as fat as Py_ssize_t is the only real cure. But in
practice, this probably won't make a lick of difference for many years (at
which point everyone will have terabytes of RAM on 64-bit boxes).
*/ */
/* Object used as dummy key to fill deleted entries */ /* Object used as dummy key to fill deleted entries */
...@@ -228,7 +238,7 @@ lookdict(dictobject *mp, PyObject *key, register long hash) ...@@ -228,7 +238,7 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
register Py_ssize_t i; register Py_ssize_t i;
register size_t perturb; register size_t perturb;
register dictentry *freeslot; register dictentry *freeslot;
register unsigned int mask = mp->ma_mask; register Py_ssize_t mask = mp->ma_mask;
dictentry *ep0 = mp->ma_table; dictentry *ep0 = mp->ma_table;
register dictentry *ep; register dictentry *ep;
register int restore_error; register int restore_error;
...@@ -339,7 +349,7 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash) ...@@ -339,7 +349,7 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
register Py_ssize_t i; register Py_ssize_t i;
register size_t perturb; register size_t perturb;
register dictentry *freeslot; register dictentry *freeslot;
register unsigned int mask = mp->ma_mask; register Py_ssize_t mask = mp->ma_mask;
dictentry *ep0 = mp->ma_table; dictentry *ep0 = mp->ma_table;
register dictentry *ep; register dictentry *ep;
...@@ -413,7 +423,7 @@ insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value) ...@@ -413,7 +423,7 @@ insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value)
Py_DECREF(dummy); Py_DECREF(dummy);
} }
ep->me_key = key; ep->me_key = key;
ep->me_hash = hash; ep->me_hash = (Py_ssize_t)hash;
ep->me_value = value; ep->me_value = value;
mp->ma_used++; mp->ma_used++;
} }
...@@ -425,11 +435,11 @@ items again. When entries have been deleted, the new table may ...@@ -425,11 +435,11 @@ items again. When entries have been deleted, the new table may
actually be smaller than the old one. actually be smaller than the old one.
*/ */
static int static int
dictresize(dictobject *mp, int minused) dictresize(dictobject *mp, Py_ssize_t minused)
{ {
int newsize; Py_ssize_t newsize;
dictentry *oldtable, *newtable, *ep; dictentry *oldtable, *newtable, *ep;
int i; Py_ssize_t i;
int is_oldtable_malloced; int is_oldtable_malloced;
dictentry small_copy[PyDict_MINSIZE]; dictentry small_copy[PyDict_MINSIZE];
...@@ -537,7 +547,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) ...@@ -537,7 +547,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
{ {
register dictobject *mp; register dictobject *mp;
register long hash; register long hash;
register int n_used; register Py_ssize_t n_used;
if (!PyDict_Check(op)) { if (!PyDict_Check(op)) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
...@@ -568,14 +578,14 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) ...@@ -568,14 +578,14 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
* Quadrupling the size improves average dictionary sparseness * Quadrupling the size improves average dictionary sparseness
* (reducing collisions) at the cost of some memory and iteration * (reducing collisions) at the cost of some memory and iteration
* speed (which loops over every possible entry). It also halves * speed (which loops over every possible entry). It also halves
* the number of expensive resize operations in a growing dictionary. | * the number of expensive resize operations in a growing dictionary.
* *
* Very large dictionaries (over 50K items) use doubling instead. * Very large dictionaries (over 50K items) use doubling instead.
* This may help applications with severe memory constraints. * This may help applications with severe memory constraints.
*/ */
if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2)) if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
return 0; return 0;
return dictresize(mp, (mp->ma_used>50000 ? mp->ma_used*2 : mp->ma_used*4)); return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
} }
int int
...@@ -619,10 +629,10 @@ PyDict_Clear(PyObject *op) ...@@ -619,10 +629,10 @@ PyDict_Clear(PyObject *op)
dictobject *mp; dictobject *mp;
dictentry *ep, *table; dictentry *ep, *table;
int table_is_malloced; int table_is_malloced;
int fill; Py_ssize_t fill;
dictentry small_copy[PyDict_MINSIZE]; dictentry small_copy[PyDict_MINSIZE];
#ifdef Py_DEBUG #ifdef Py_DEBUG
int i, n; Py_ssize_t i, n;
#endif #endif
if (!PyDict_Check(op)) if (!PyDict_Check(op))
...@@ -685,7 +695,7 @@ PyDict_Clear(PyObject *op) ...@@ -685,7 +695,7 @@ PyDict_Clear(PyObject *op)
/* /*
* Iterate over a dict. Use like so: * Iterate over a dict. Use like so:
* *
* int i; * Py_ssize_t i;
* PyObject *key, *value; * PyObject *key, *value;
* i = 0; # important! i should not otherwise be changed by you * i = 0; # important! i should not otherwise be changed by you
* while (PyDict_Next(yourdict, &i, &key, &value)) { * while (PyDict_Next(yourdict, &i, &key, &value)) {
...@@ -701,7 +711,7 @@ int ...@@ -701,7 +711,7 @@ int
PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
{ {
register Py_ssize_t i; register Py_ssize_t i;
register int mask; register Py_ssize_t mask;
register dictentry *ep; register dictentry *ep;
if (!PyDict_Check(op)) if (!PyDict_Check(op))
...@@ -729,7 +739,7 @@ static void ...@@ -729,7 +739,7 @@ static void
dict_dealloc(register dictobject *mp) dict_dealloc(register dictobject *mp)
{ {
register dictentry *ep; register dictentry *ep;
int fill = mp->ma_fill; Py_ssize_t fill = mp->ma_fill;
PyObject_GC_UnTrack(mp); PyObject_GC_UnTrack(mp);
Py_TRASHCAN_SAFE_BEGIN(mp) Py_TRASHCAN_SAFE_BEGIN(mp)
for (ep = mp->ma_table; fill > 0; ep++) { for (ep = mp->ma_table; fill > 0; ep++) {
...@@ -751,10 +761,10 @@ dict_dealloc(register dictobject *mp) ...@@ -751,10 +761,10 @@ dict_dealloc(register dictobject *mp)
static int static int
dict_print(register dictobject *mp, register FILE *fp, register int flags) dict_print(register dictobject *mp, register FILE *fp, register int flags)
{ {
register int i; register Py_ssize_t i;
register int any; register Py_ssize_t any;
i = Py_ReprEnter((PyObject*)mp); i = (int)Py_ReprEnter((PyObject*)mp);
if (i != 0) { if (i != 0) {
if (i < 0) if (i < 0)
return i; return i;
...@@ -896,7 +906,7 @@ dict_subscript(dictobject *mp, register PyObject *key) ...@@ -896,7 +906,7 @@ dict_subscript(dictobject *mp, register PyObject *key)
PyObject *missing; PyObject *missing;
static PyObject *missing_str = NULL; static PyObject *missing_str = NULL;
if (missing_str == NULL) if (missing_str == NULL)
missing_str = missing_str =
PyString_InternFromString("__missing__"); PyString_InternFromString("__missing__");
missing = _PyType_Lookup(mp->ob_type, missing_str); missing = _PyType_Lookup(mp->ob_type, missing_str);
if (missing != NULL) if (missing != NULL)
...@@ -930,9 +940,9 @@ static PyObject * ...@@ -930,9 +940,9 @@ static PyObject *
dict_keys(register dictobject *mp) dict_keys(register dictobject *mp)
{ {
register PyObject *v; register PyObject *v;
register int i, j; register Py_ssize_t i, j;
dictentry *ep; dictentry *ep;
int mask, n; Py_ssize_t mask, n;
again: again:
n = mp->ma_used; n = mp->ma_used;
...@@ -964,9 +974,9 @@ static PyObject * ...@@ -964,9 +974,9 @@ static PyObject *
dict_values(register dictobject *mp) dict_values(register dictobject *mp)
{ {
register PyObject *v; register PyObject *v;
register int i, j; register Py_ssize_t i, j;
dictentry *ep; dictentry *ep;
int mask, n; Py_ssize_t mask, n;
again: again:
n = mp->ma_used; n = mp->ma_used;
...@@ -998,8 +1008,8 @@ static PyObject * ...@@ -998,8 +1008,8 @@ static PyObject *
dict_items(register dictobject *mp) dict_items(register dictobject *mp)
{ {
register PyObject *v; register PyObject *v;
register int i, j, n; register Py_ssize_t i, j, n;
int mask; Py_ssize_t mask;
PyObject *item, *key, *value; PyObject *item, *key, *value;
dictentry *ep; dictentry *ep;
...@@ -1132,7 +1142,7 @@ int ...@@ -1132,7 +1142,7 @@ int
PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override) PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
{ {
PyObject *it; /* iter(seq2) */ PyObject *it; /* iter(seq2) */
int i; /* index into seq2 of current element */ Py_ssize_t i; /* index into seq2 of current element */
PyObject *item; /* seq2[i] */ PyObject *item; /* seq2[i] */
PyObject *fast; /* item as a 2-tuple or 2-list */ PyObject *fast; /* item as a 2-tuple or 2-list */
...@@ -1195,7 +1205,7 @@ Fail: ...@@ -1195,7 +1205,7 @@ Fail:
i = -1; i = -1;
Return: Return:
Py_DECREF(it); Py_DECREF(it);
return i; return (int)i;
} }
int int
...@@ -1208,7 +1218,7 @@ int ...@@ -1208,7 +1218,7 @@ int
PyDict_Merge(PyObject *a, PyObject *b, int override) PyDict_Merge(PyObject *a, PyObject *b, int override)
{ {
register PyDictObject *mp, *other; register PyDictObject *mp, *other;
register int i; register Py_ssize_t i;
dictentry *entry; dictentry *entry;
/* We accept for the argument either a concrete dictionary object, /* We accept for the argument either a concrete dictionary object,
...@@ -1247,7 +1257,8 @@ PyDict_Merge(PyObject *a, PyObject *b, int override) ...@@ -1247,7 +1257,8 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
PyDict_GetItem(a, entry->me_key) == NULL)) { PyDict_GetItem(a, entry->me_key) == NULL)) {
Py_INCREF(entry->me_key); Py_INCREF(entry->me_key);
Py_INCREF(entry->me_value); Py_INCREF(entry->me_value);
insertdict(mp, entry->me_key, entry->me_hash, insertdict(mp, entry->me_key,
(long)entry->me_hash,
entry->me_value); entry->me_value);
} }
} }
...@@ -1376,7 +1387,8 @@ characterize(dictobject *a, dictobject *b, PyObject **pval) ...@@ -1376,7 +1387,8 @@ characterize(dictobject *a, dictobject *b, PyObject **pval)
{ {
PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */ PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */
PyObject *aval = NULL; /* a[akey] */ PyObject *aval = NULL; /* a[akey] */
int i, cmp; Py_ssize_t i;
int cmp;
for (i = 0; i <= a->ma_mask; i++) { for (i = 0; i <= a->ma_mask; i++) {
PyObject *thiskey, *thisaval, *thisbval; PyObject *thiskey, *thisaval, *thisbval;
...@@ -1399,7 +1411,7 @@ characterize(dictobject *a, dictobject *b, PyObject **pval) ...@@ -1399,7 +1411,7 @@ characterize(dictobject *a, dictobject *b, PyObject **pval)
* find its associated value anymore; or * find its associated value anymore; or
* maybe it is but the compare deleted the * maybe it is but the compare deleted the
* a[thiskey] entry. * a[thiskey] entry.
*/ | */
Py_DECREF(thiskey); Py_DECREF(thiskey);
continue; continue;
} }
...@@ -1499,7 +1511,7 @@ Finished: ...@@ -1499,7 +1511,7 @@ Finished:
static int static int
dict_equal(dictobject *a, dictobject *b) dict_equal(dictobject *a, dictobject *b)
{ {
int i; Py_ssize_t i;
if (a->ma_used != b->ma_used) if (a->ma_used != b->ma_used)
/* can't be equal if # of entries differ */ /* can't be equal if # of entries differ */
...@@ -1673,7 +1685,7 @@ dict_pop(dictobject *mp, PyObject *args) ...@@ -1673,7 +1685,7 @@ dict_pop(dictobject *mp, PyObject *args)
static PyObject * static PyObject *
dict_popitem(dictobject *mp) dict_popitem(dictobject *mp)
{ {
int i = 0; Py_ssize_t i = 0;
dictentry *ep; dictentry *ep;
PyObject *res; PyObject *res;
...@@ -1683,7 +1695,7 @@ dict_popitem(dictobject *mp) ...@@ -1683,7 +1695,7 @@ dict_popitem(dictobject *mp)
* happened, the result would be an infinite loop (searching for an * happened, the result would be an infinite loop (searching for an
* entry that no longer exists). Note that the usual popitem() * entry that no longer exists). Note that the usual popitem()
* idiom is "while d: k, v = d.popitem()". so needing to throw the * idiom is "while d: k, v = d.popitem()". so needing to throw the
* tuple away if the dict *is* empty isn't a significant * tuple away if the dict *is* empty isn't a significant
* inefficiency -- possible, but unlikely in practice. * inefficiency -- possible, but unlikely in practice.
*/ */
res = PyTuple_New(2); res = PyTuple_New(2);
...@@ -1699,11 +1711,11 @@ dict_popitem(dictobject *mp) ...@@ -1699,11 +1711,11 @@ dict_popitem(dictobject *mp)
* field of slot 0 to hold a search finger: * field of slot 0 to hold a search finger:
* If slot 0 has a value, use slot 0. * If slot 0 has a value, use slot 0.
* Else slot 0 is being used to hold a search finger, * Else slot 0 is being used to hold a search finger,
* and we use its hash value as the first index to look. | * and we use its hash value as the first index to look.
*/ */
ep = &mp->ma_table[0]; ep = &mp->ma_table[0];
if (ep->me_value == NULL) { if (ep->me_value == NULL) {
i = (int)ep->me_hash; i = ep->me_hash;
/* The hash field may be a real hash value, or it may be a /* The hash field may be a real hash value, or it may be a
* legit search finger, or it may be a once-legit search * legit search finger, or it may be a once-legit search
* finger that's out of bounds now because it wrapped around * finger that's out of bounds now because it wrapped around
...@@ -2035,10 +2047,10 @@ PyDict_DelItemString(PyObject *v, const char *key) ...@@ -2035,10 +2047,10 @@ PyDict_DelItemString(PyObject *v, const char *key)
typedef struct { typedef struct {
PyObject_HEAD PyObject_HEAD
dictobject *di_dict; /* Set to NULL when iterator is exhausted */ dictobject *di_dict; /* Set to NULL when iterator is exhausted */
int di_used; Py_ssize_t di_used;
int di_pos; Py_ssize_t di_pos;
PyObject* di_result; /* reusable result tuple for iteritems */ PyObject* di_result; /* reusable result tuple for iteritems */
long len; Py_ssize_t len;
} dictiterobject; } dictiterobject;
static PyObject * static PyObject *
...@@ -2076,10 +2088,10 @@ dictiter_dealloc(dictiterobject *di) ...@@ -2076,10 +2088,10 @@ dictiter_dealloc(dictiterobject *di)
static PyObject * static PyObject *
dictiter_len(dictiterobject *di) dictiter_len(dictiterobject *di)
{ {
long len = 0; Py_ssize_t len = 0;
if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used) if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used)
len = di->len; len = di->len;
return PyInt_FromLong(len); return PyInt_FromSize_t(len);
} }
PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
...@@ -2092,7 +2104,7 @@ static PyMethodDef dictiter_methods[] = { ...@@ -2092,7 +2104,7 @@ static PyMethodDef dictiter_methods[] = {
static PyObject *dictiter_iternextkey(dictiterobject *di) static PyObject *dictiter_iternextkey(dictiterobject *di)
{ {
PyObject *key; PyObject *key;
register int i, mask; register Py_ssize_t i, mask;
register dictentry *ep; register dictentry *ep;
dictobject *d = di->di_dict; dictobject *d = di->di_dict;
...@@ -2165,7 +2177,7 @@ PyTypeObject PyDictIterKey_Type = { ...@@ -2165,7 +2177,7 @@ PyTypeObject PyDictIterKey_Type = {
static PyObject *dictiter_iternextvalue(dictiterobject *di) static PyObject *dictiter_iternextvalue(dictiterobject *di)
{ {
PyObject *value; PyObject *value;
register int i, mask; register Py_ssize_t i, mask;
register dictentry *ep; register dictentry *ep;
dictobject *d = di->di_dict; dictobject *d = di->di_dict;
...@@ -2238,7 +2250,7 @@ PyTypeObject PyDictIterValue_Type = { ...@@ -2238,7 +2250,7 @@ PyTypeObject PyDictIterValue_Type = {
static PyObject *dictiter_iternextitem(dictiterobject *di) static PyObject *dictiter_iternextitem(dictiterobject *di)
{ {
PyObject *key, *value, *result = di->di_result; PyObject *key, *value, *result = di->di_result;
register int i, mask; register Py_ssize_t i, mask;
register dictentry *ep; register dictentry *ep;
dictobject *d = di->di_dict; dictobject *d = di->di_dict;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment