Commit bedadcce authored by Marius Wachtler's avatar Marius Wachtler

string: store hash inside object + microoptimizations

parent 00b022aa
...@@ -120,45 +120,6 @@ static inline Box* callattrInternal3(Box* obj, BoxedString* attr, LookupScope sc ...@@ -120,45 +120,6 @@ static inline Box* callattrInternal3(Box* obj, BoxedString* attr, LookupScope sc
return callattrInternal<S, rewritable>(obj, attr, scope, rewrite_args, argspec, arg1, arg2, arg3, NULL, NULL); return callattrInternal<S, rewritable>(obj, attr, scope, rewrite_args, argspec, arg1, arg2, arg3, NULL, NULL);
} }
#if STAT_TIMERS
static uint64_t* pyhasher_timer_counter = Stats::getStatCounter("us_timer_PyHasher");
static uint64_t* pyeq_timer_counter = Stats::getStatCounter("us_timer_PyEq");
static uint64_t* pylt_timer_counter = Stats::getStatCounter("us_timer_PyLt");
#endif
size_t PyHasher::operator()(Box* b) const {
#if EXPENSIVE_STAT_TIMERS
ScopedStatTimer _st(pyhasher_timer_counter, 10);
#endif
if (b->cls == str_cls) {
auto s = static_cast<BoxedString*>(b);
return strHashUnboxed(s);
}
return hashUnboxed(b);
}
bool PyEq::operator()(Box* lhs, Box* rhs) const {
#if EXPENSIVE_STAT_TIMERS
ScopedStatTimer _st(pyeq_timer_counter, 10);
#endif
int r = PyObject_RichCompareBool(lhs, rhs, Py_EQ);
if (r == -1)
throwCAPIException();
return (bool)r;
}
bool PyLt::operator()(Box* lhs, Box* rhs) const {
#if EXPENSIVE_STAT_TIMERS
ScopedStatTimer _st(pylt_timer_counter, 10);
#endif
int r = PyObject_RichCompareBool(lhs, rhs, Py_LT);
if (r == -1)
throwCAPIException();
return (bool)r;
}
extern "C" Box* deopt(AST_expr* expr, Box* value) { extern "C" Box* deopt(AST_expr* expr, Box* value) {
STAT_TIMER(t0, "us_timer_deopt", 10); STAT_TIMER(t0, "us_timer_deopt", 10);
......
...@@ -55,33 +55,33 @@ namespace pyston { ...@@ -55,33 +55,33 @@ namespace pyston {
BoxedString* EmptyString; BoxedString* EmptyString;
BoxedString* characters[UCHAR_MAX + 1]; BoxedString* characters[UCHAR_MAX + 1];
BoxedString::BoxedString(const char* s, size_t n) : interned_state(SSTATE_NOT_INTERNED) { BoxedString::BoxedString(const char* s, size_t n) : hash(-1), interned_state(SSTATE_NOT_INTERNED) {
assert(s); assert(s);
RELEASE_ASSERT(n != llvm::StringRef::npos, ""); RELEASE_ASSERT(n != llvm::StringRef::npos, "");
memmove(data(), s, n); memcpy(data(), s, n);
data()[n] = 0; data()[n] = 0;
} }
BoxedString::BoxedString(llvm::StringRef lhs, llvm::StringRef rhs) : interned_state(SSTATE_NOT_INTERNED) { BoxedString::BoxedString(llvm::StringRef lhs, llvm::StringRef rhs) : hash(-1), interned_state(SSTATE_NOT_INTERNED) {
RELEASE_ASSERT(lhs.size() + rhs.size() != llvm::StringRef::npos, ""); RELEASE_ASSERT(lhs.size() + rhs.size() != llvm::StringRef::npos, "");
memmove(data(), lhs.data(), lhs.size()); memcpy(data(), lhs.data(), lhs.size());
memmove(data() + lhs.size(), rhs.data(), rhs.size()); memcpy(data() + lhs.size(), rhs.data(), rhs.size());
data()[lhs.size() + rhs.size()] = 0; data()[lhs.size() + rhs.size()] = 0;
} }
BoxedString::BoxedString(llvm::StringRef s) : interned_state(SSTATE_NOT_INTERNED) { BoxedString::BoxedString(llvm::StringRef s) : hash(-1), interned_state(SSTATE_NOT_INTERNED) {
RELEASE_ASSERT(s.size() != llvm::StringRef::npos, ""); RELEASE_ASSERT(s.size() != llvm::StringRef::npos, "");
memmove(data(), s.data(), s.size()); memcpy(data(), s.data(), s.size());
data()[s.size()] = 0; data()[s.size()] = 0;
} }
BoxedString::BoxedString(size_t n, char c) : interned_state(SSTATE_NOT_INTERNED) { BoxedString::BoxedString(size_t n, char c) : hash(-1), interned_state(SSTATE_NOT_INTERNED) {
RELEASE_ASSERT(n != llvm::StringRef::npos, ""); RELEASE_ASSERT(n != llvm::StringRef::npos, "");
memset(data(), c, n); memset(data(), c, n);
data()[n] = 0; data()[n] = 0;
} }
BoxedString::BoxedString(size_t n) : interned_state(SSTATE_NOT_INTERNED) { BoxedString::BoxedString(size_t n) : hash(-1), interned_state(SSTATE_NOT_INTERNED) {
RELEASE_ASSERT(n != llvm::StringRef::npos, ""); RELEASE_ASSERT(n != llvm::StringRef::npos, "");
// Note: no memset. add the null-terminator for good measure though // Note: no memset. add the null-terminator for good measure though
// (CPython does the same thing). // (CPython does the same thing).
...@@ -1586,13 +1586,15 @@ extern "C" size_t strHashUnboxed(BoxedString* self) { ...@@ -1586,13 +1586,15 @@ extern "C" size_t strHashUnboxed(BoxedString* self) {
#ifdef Py_DEBUG #ifdef Py_DEBUG
assert(_Py_HashSecret_Initialized); assert(_Py_HashSecret_Initialized);
#endif #endif
if (self->hash != -1)
return self->hash;
long len = Py_SIZE(self); long len = Py_SIZE(self);
/* /*
We make the hash of the empty string be 0, rather than using We make the hash of the empty string be 0, rather than using
(prefix ^ suffix), since this slightly obfuscates the hash secret (prefix ^ suffix), since this slightly obfuscates the hash secret
*/ */
if (len == 0) { if (len == 0) {
self->hash = 0;
return 0; return 0;
} }
p = self->s().data(); p = self->s().data();
...@@ -1604,7 +1606,7 @@ extern "C" size_t strHashUnboxed(BoxedString* self) { ...@@ -1604,7 +1606,7 @@ extern "C" size_t strHashUnboxed(BoxedString* self) {
x ^= _Py_HashSecret.suffix; x ^= _Py_HashSecret.suffix;
if (x == -1) if (x == -1)
x = -2; x = -2;
self->hash = x;
return x; return x;
} }
...@@ -1662,6 +1664,11 @@ Box* _strSlice(BoxedString* self, i64 start, i64 stop, i64 step, i64 length) { ...@@ -1662,6 +1664,11 @@ Box* _strSlice(BoxedString* self, i64 start, i64 stop, i64 step, i64 length) {
if (length == 0) if (length == 0)
return EmptyString; return EmptyString;
if (length == 1) {
char c = self->s()[start];
return characters[c & UCHAR_MAX];
}
BoxedString* bs = BoxedString::createUninitializedString(length); BoxedString* bs = BoxedString::createUninitializedString(length);
copySlice(bs->data(), s.data(), start, step, length); copySlice(bs->data(), s.data(), start, step, length);
return bs; return bs;
...@@ -2531,19 +2538,18 @@ extern "C" int _PyString_Resize(PyObject** pv, Py_ssize_t newsize) noexcept { ...@@ -2531,19 +2538,18 @@ extern "C" int _PyString_Resize(PyObject** pv, Py_ssize_t newsize) noexcept {
if (newsize < s->size()) { if (newsize < s->size()) {
// XXX resize the box (by reallocating) smaller if it makes sense // XXX resize the box (by reallocating) smaller if it makes sense
s->ob_size = newsize; s->ob_size = newsize;
s->hash = -1; /* invalidate cached hash value */
s->data()[newsize] = 0; s->data()[newsize] = 0;
return 0; return 0;
} }
BoxedString* resized; BoxedString* resized;
if (s->cls == str_cls) if (s->cls == str_cls)
resized = new (newsize) BoxedString(newsize, 0); // we need an uninitialized string, but this will memset resized = BoxedString::createUninitializedString(newsize);
else else
resized = new (s->cls, newsize) resized = BoxedString::createUninitializedString(s->cls, newsize);
BoxedString(newsize, 0); // we need an uninitialized string, but this will memset memcpy(resized->data(), s->data(), s->size());
memmove(resized->data(), s->data(), s->size()); resized->data()[newsize] = 0;
*pv = resized; *pv = resized;
return 0; return 0;
} }
......
...@@ -175,6 +175,12 @@ extern "C" void printFloat(double d); ...@@ -175,6 +175,12 @@ extern "C" void printFloat(double d);
Box* objectStr(Box*); Box* objectStr(Box*);
Box* objectRepr(Box*); Box* objectRepr(Box*);
void checkAndThrowCAPIException();
void throwCAPIException() __attribute__((noreturn));
void ensureCAPIExceptionSet();
struct ExcInfo;
void setCAPIException(const ExcInfo& e);
// In Pyston, this is the same type as CPython's PyTypeObject (they are interchangeable, but we // In Pyston, this is the same type as CPython's PyTypeObject (they are interchangeable, but we
// use BoxedClass in Pyston wherever possible as a convention). // use BoxedClass in Pyston wherever possible as a convention).
class BoxedClass : public BoxVar { class BoxedClass : public BoxVar {
...@@ -387,6 +393,7 @@ public: ...@@ -387,6 +393,7 @@ public:
// optimizations and inlining, creating a new one each time shouldn't have any cost. // optimizations and inlining, creating a new one each time shouldn't have any cost.
llvm::StringRef s() const { return llvm::StringRef(s_data, ob_size); }; llvm::StringRef s() const { return llvm::StringRef(s_data, ob_size); };
long hash; // -1 means not yet computed
char interned_state; char interned_state;
char* data() { return s_data; } char* data() { return s_data; }
...@@ -430,6 +437,7 @@ public: ...@@ -430,6 +437,7 @@ public:
// creates an uninitialized string of length n; useful for directly constructing into the string and avoiding // creates an uninitialized string of length n; useful for directly constructing into the string and avoiding
// copies: // copies:
static BoxedString* createUninitializedString(ssize_t n) { return new (n) BoxedString(n); } static BoxedString* createUninitializedString(ssize_t n) { return new (n) BoxedString(n); }
static BoxedString* createUninitializedString(BoxedClass* cls, ssize_t n) { return new (cls, n) BoxedString(n); }
// Gets a writeable pointer to the contents of a string. // Gets a writeable pointer to the contents of a string.
// Is only meant to be used with something just created from createUninitializedString(), though // Is only meant to be used with something just created from createUninitializedString(), though
...@@ -447,6 +455,7 @@ private: ...@@ -447,6 +455,7 @@ private:
}; };
extern "C" size_t strHashUnboxed(BoxedString* self); extern "C" size_t strHashUnboxed(BoxedString* self);
extern "C" int64_t hashUnboxed(Box* obj);
class BoxedInstanceMethod : public Box { class BoxedInstanceMethod : public Box {
public: public:
...@@ -678,15 +687,33 @@ static_assert(offsetof(BoxedTuple, elts) == offsetof(PyTupleObject, ob_item), "" ...@@ -678,15 +687,33 @@ static_assert(offsetof(BoxedTuple, elts) == offsetof(PyTupleObject, ob_item), ""
extern BoxedString* characters[UCHAR_MAX + 1]; extern BoxedString* characters[UCHAR_MAX + 1];
struct PyHasher { struct PyHasher {
size_t operator()(Box*) const; size_t operator()(Box* b) const {
if (b->cls == str_cls) {
auto s = static_cast<BoxedString*>(b);
if (s->hash != -1)
return s->hash;
return strHashUnboxed(s);
}
return hashUnboxed(b);
}
}; };
struct PyEq { struct PyEq {
bool operator()(Box*, Box*) const; bool operator()(Box* lhs, Box* rhs) const {
int r = PyObject_RichCompareBool(lhs, rhs, Py_EQ);
if (r == -1)
throwCAPIException();
return (bool)r;
}
}; };
struct PyLt { struct PyLt {
bool operator()(Box*, Box*) const; bool operator()(Box* lhs, Box* rhs) const {
int r = PyObject_RichCompareBool(lhs, rhs, Py_LT);
if (r == -1)
throwCAPIException();
return (bool)r;
}
}; };
// llvm::DenseMap doesn't store the original hash values, choosing to instead // llvm::DenseMap doesn't store the original hash values, choosing to instead
...@@ -1077,12 +1104,6 @@ AST* unboxAst(Box* b); ...@@ -1077,12 +1104,6 @@ AST* unboxAst(Box* b);
// Our default for tp_alloc: // Our default for tp_alloc:
extern "C" PyObject* PystonType_GenericAlloc(BoxedClass* cls, Py_ssize_t nitems) noexcept; extern "C" PyObject* PystonType_GenericAlloc(BoxedClass* cls, Py_ssize_t nitems) noexcept;
void checkAndThrowCAPIException();
void throwCAPIException() __attribute__((noreturn));
void ensureCAPIExceptionSet();
struct ExcInfo;
void setCAPIException(const ExcInfo& e);
#define fatalOrError(exception, message) \ #define fatalOrError(exception, message) \
do { \ do { \
if (CONTINUE_AFTER_FATAL) \ if (CONTINUE_AFTER_FATAL) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment