Commit 0ab085c4 authored by Tim Peters's avatar Tim Peters

Changed the dict implementation to take "string shortcuts" only when

keys are true strings -- no subclasses need apply.  This may be debatable.

The problem is that a str subclass may very well want to override __eq__
and/or __hash__ (see the new example of case-insensitive strings in
test_descr), but go-fast shortcuts for strings are ubiquitous in our dicts
(and subclass overrides aren't even looked for then).  Another go-fast
reason for the change is that PyCheck_StringExact() is a quicker test
than PyCheck_String(), and we make such a test on virtually every access
to every dict.

OTOH, a str subclass may also be perfectly happy using the base str eq
and hash, and this change slows them a lot.  But those cases are still
hypothetical, while Python's own reliance on true-string dicts is not.
parent 742dfd6f
...@@ -1533,8 +1533,8 @@ def inherits(): ...@@ -1533,8 +1533,8 @@ def inherits():
verify(str(s) == base) verify(str(s) == base)
verify(str(s).__class__ is str) verify(str(s).__class__ is str)
verify(hash(s) == hash(base)) verify(hash(s) == hash(base))
verify({s: 1}[base] == 1) #XXX verify({s: 1}[base] == 1)
verify({base: 1}[s] == 1) #XXX verify({base: 1}[s] == 1)
verify((s + "").__class__ is str) verify((s + "").__class__ is str)
verify(s + "" == base) verify(s + "" == base)
verify(("" + s).__class__ is str) verify(("" + s).__class__ is str)
...@@ -1758,6 +1758,39 @@ f = t(%r, 'w') # rexec can't catch this by itself ...@@ -1758,6 +1758,39 @@ f = t(%r, 'w') # rexec can't catch this by itself
except: except:
pass pass
def str_subclass_as_dict_key():
if verbose:
print "Testing a str subclass used as dict key .."
class cistr(str):
"""Sublcass of str that computes __eq__ case-insensitively.
Also computes a hash code of the string in canonical form.
"""
def __init__(self, value):
self.canonical = value.lower()
self.hashcode = hash(self.canonical)
def __eq__(self, other):
if not isinstance(other, cistr):
other = cistr(other)
return self.canonical == other.canonical
def __hash__(self):
return self.hashcode
verify('aBc' == cistr('ABC') == 'abc')
verify(str(cistr('ABC')) == 'ABC')
d = {cistr('one'): 1, cistr('two'): 2, cistr('tHree'): 3}
verify(d[cistr('one')] == 1)
verify(d[cistr('tWo')] == 2)
verify(d[cistr('THrEE')] == 3)
verify(cistr('ONe') in d)
verify(d.get(cistr('thrEE')) == 3)
def all(): def all():
lists() lists()
dicts() dicts()
...@@ -1794,6 +1827,7 @@ def all(): ...@@ -1794,6 +1827,7 @@ def all():
inherits() inherits()
keywords() keywords()
restricted() restricted()
str_subclass_as_dict_key()
all() all()
......
...@@ -298,8 +298,8 @@ Done: ...@@ -298,8 +298,8 @@ Done:
* means we don't need to go through PyObject_Compare(); we can always use * means we don't need to go through PyObject_Compare(); we can always use
* _PyString_Eq directly. * _PyString_Eq directly.
* *
* This really only becomes meaningful if proper error handling in lookdict() * This is valuable because the general-case error handling in lookdict() is
* is too expensive. * expensive, and dicts with pure-string keys are very common.
*/ */
static dictentry * static dictentry *
lookdict_string(dictobject *mp, PyObject *key, register long hash) lookdict_string(dictobject *mp, PyObject *key, register long hash)
...@@ -311,8 +311,11 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash) ...@@ -311,8 +311,11 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
dictentry *ep0 = mp->ma_table; dictentry *ep0 = mp->ma_table;
register dictentry *ep; register dictentry *ep;
/* make sure this function doesn't have to handle non-string keys */ /* Make sure this function doesn't have to handle non-string keys,
if (!PyString_Check(key)) { including subclasses of str; e.g., one reason to subclass
strings is to override __eq__, and for speed we don't cater to
that here. */
if (!PyString_CheckExact(key)) {
#ifdef SHOW_CONVERSION_COUNTS #ifdef SHOW_CONVERSION_COUNTS
++converted; ++converted;
#endif #endif
...@@ -478,7 +481,7 @@ PyDict_GetItem(PyObject *op, PyObject *key) ...@@ -478,7 +481,7 @@ PyDict_GetItem(PyObject *op, PyObject *key)
return NULL; return NULL;
} }
#ifdef CACHE_HASH #ifdef CACHE_HASH
if (!PyString_Check(key) || if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1) (hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif #endif
{ {
...@@ -510,7 +513,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) ...@@ -510,7 +513,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
} }
mp = (dictobject *)op; mp = (dictobject *)op;
#ifdef CACHE_HASH #ifdef CACHE_HASH
if (PyString_Check(key)) { if (PyString_CheckExact(key)) {
#ifdef INTERN_STRINGS #ifdef INTERN_STRINGS
if (((PyStringObject *)key)->ob_sinterned != NULL) { if (((PyStringObject *)key)->ob_sinterned != NULL) {
key = ((PyStringObject *)key)->ob_sinterned; key = ((PyStringObject *)key)->ob_sinterned;
...@@ -562,7 +565,7 @@ PyDict_DelItem(PyObject *op, PyObject *key) ...@@ -562,7 +565,7 @@ PyDict_DelItem(PyObject *op, PyObject *key)
return -1; return -1;
} }
#ifdef CACHE_HASH #ifdef CACHE_HASH
if (!PyString_Check(key) || if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1) (hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif #endif
{ {
...@@ -820,7 +823,7 @@ dict_repr(dictobject *mp) ...@@ -820,7 +823,7 @@ dict_repr(dictobject *mp)
if (s == NULL) if (s == NULL)
goto Done; goto Done;
result = _PyString_Join(s, pieces); result = _PyString_Join(s, pieces);
Py_DECREF(s); Py_DECREF(s);
Done: Done:
Py_XDECREF(pieces); Py_XDECREF(pieces);
...@@ -842,7 +845,7 @@ dict_subscript(dictobject *mp, register PyObject *key) ...@@ -842,7 +845,7 @@ dict_subscript(dictobject *mp, register PyObject *key)
long hash; long hash;
assert(mp->ma_table != NULL); assert(mp->ma_table != NULL);
#ifdef CACHE_HASH #ifdef CACHE_HASH
if (!PyString_Check(key) || if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1) (hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif #endif
{ {
...@@ -1358,7 +1361,7 @@ dict_has_key(register dictobject *mp, PyObject *key) ...@@ -1358,7 +1361,7 @@ dict_has_key(register dictobject *mp, PyObject *key)
long hash; long hash;
register long ok; register long ok;
#ifdef CACHE_HASH #ifdef CACHE_HASH
if (!PyString_Check(key) || if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1) (hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif #endif
{ {
...@@ -1382,7 +1385,7 @@ dict_get(register dictobject *mp, PyObject *args) ...@@ -1382,7 +1385,7 @@ dict_get(register dictobject *mp, PyObject *args)
return NULL; return NULL;
#ifdef CACHE_HASH #ifdef CACHE_HASH
if (!PyString_Check(key) || if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1) (hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif #endif
{ {
...@@ -1411,7 +1414,7 @@ dict_setdefault(register dictobject *mp, PyObject *args) ...@@ -1411,7 +1414,7 @@ dict_setdefault(register dictobject *mp, PyObject *args)
return NULL; return NULL;
#ifdef CACHE_HASH #ifdef CACHE_HASH
if (!PyString_Check(key) || if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1) (hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif #endif
{ {
...@@ -1647,7 +1650,7 @@ dict_contains(dictobject *mp, PyObject *key) ...@@ -1647,7 +1650,7 @@ dict_contains(dictobject *mp, PyObject *key)
long hash; long hash;
#ifdef CACHE_HASH #ifdef CACHE_HASH
if (!PyString_Check(key) || if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1) (hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif #endif
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment