Commit 3486e84c authored by Chris Toshok's avatar Chris Toshok

Merge pull request #427 from toshok/unicode-hash

use the same hash function for both unicode and string objects
parents ed5f850c 15a0ed30
......@@ -6598,14 +6598,12 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index)
return (PyObject*) PyUnicode_FromUnicode(&self->str[index], 1);
}
extern size_t unicodeHashUnboxed(PyUnicodeObject* obj);
static long
unicode_hash(PyUnicodeObject *self)
{
// Pyston change: just convert to a str and hash, since we use std::hash and not
// CPython's hashing algorithm they duplicated here:
PyObject* str = PyUnicode_AsEncodedString((PyObject*)self, "utf8", "replace");
return str->ob_type->tp_hash(str);
return unicodeHashUnboxed(self);
#if 0
/* Since Unicode objects compare equal to their ASCII string
counterparts, they should use the individual character values
......
......@@ -128,7 +128,7 @@ static Box* (*callattrInternal3)(Box*, const std::string*, LookupScope, CallRewr
size_t PyHasher::operator()(Box* b) const {
if (b->cls == str_cls) {
std::hash<std::string> H;
StringHash<std::string> H;
return H(static_cast<BoxedString*>(b)->s);
}
......
......@@ -1478,10 +1478,20 @@ failed:
return NULL;
}
extern "C" size_t unicodeHashUnboxed(PyUnicodeObject* self) {
if (self->hash != -1)
return self->hash;
Py_ssize_t len = PyUnicode_GET_SIZE(self);
Py_UNICODE* p = PyUnicode_AS_UNICODE(self);
pyston::StringHash<Py_UNICODE> H;
return H(p, len);
}
extern "C" Box* strHash(BoxedString* self) {
assert(isSubclass(self->cls, str_cls));
std::hash<std::string> H;
StringHash<std::string> H;
return boxInt(H(self->s));
}
......
......@@ -385,6 +385,37 @@ class BoxedUnicode : public Box {
// TODO implementation
};
template <typename T> struct StringHash {
size_t operator()(const T* str) {
size_t hash = 5381;
T c;
while ((c = *str++))
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash;
}
size_t operator()(const T* str, int len) {
size_t hash = 5381;
T c;
while (--len >= 0) {
c = *str++;
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
}
return hash;
}
};
template <> struct StringHash<std::string> {
size_t operator()(const std::string& str) {
StringHash<char> H;
return H(&str[0], str.size());
}
};
class BoxedInstanceMethod : public Box {
public:
Box** in_weakreflist;
......
......@@ -24,6 +24,7 @@ print u'a' in c.__dict__
print u'' == ''
print '' == u''
print hash(u'') == hash('')
print hash(u'hello world') == hash('hello world')
print "Hello " + u" World"
print u"Hello " + " World"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment