Commit bedadcce authored by Marius Wachtler's avatar Marius Wachtler

string: store hash inside object + microoptimizations

parent 00b022aa
......@@ -120,45 +120,6 @@ static inline Box* callattrInternal3(Box* obj, BoxedString* attr, LookupScope sc
return callattrInternal<S, rewritable>(obj, attr, scope, rewrite_args, argspec, arg1, arg2, arg3, NULL, NULL);
}
#if STAT_TIMERS
static uint64_t* pyhasher_timer_counter = Stats::getStatCounter("us_timer_PyHasher");
static uint64_t* pyeq_timer_counter = Stats::getStatCounter("us_timer_PyEq");
static uint64_t* pylt_timer_counter = Stats::getStatCounter("us_timer_PyLt");
#endif
size_t PyHasher::operator()(Box* b) const {
#if EXPENSIVE_STAT_TIMERS
ScopedStatTimer _st(pyhasher_timer_counter, 10);
#endif
if (b->cls == str_cls) {
auto s = static_cast<BoxedString*>(b);
return strHashUnboxed(s);
}
return hashUnboxed(b);
}
bool PyEq::operator()(Box* lhs, Box* rhs) const {
#if EXPENSIVE_STAT_TIMERS
ScopedStatTimer _st(pyeq_timer_counter, 10);
#endif
int r = PyObject_RichCompareBool(lhs, rhs, Py_EQ);
if (r == -1)
throwCAPIException();
return (bool)r;
}
bool PyLt::operator()(Box* lhs, Box* rhs) const {
#if EXPENSIVE_STAT_TIMERS
ScopedStatTimer _st(pylt_timer_counter, 10);
#endif
int r = PyObject_RichCompareBool(lhs, rhs, Py_LT);
if (r == -1)
throwCAPIException();
return (bool)r;
}
extern "C" Box* deopt(AST_expr* expr, Box* value) {
STAT_TIMER(t0, "us_timer_deopt", 10);
......
......@@ -55,33 +55,33 @@ namespace pyston {
BoxedString* EmptyString;
BoxedString* characters[UCHAR_MAX + 1];
BoxedString::BoxedString(const char* s, size_t n) : interned_state(SSTATE_NOT_INTERNED) {
BoxedString::BoxedString(const char* s, size_t n) : hash(-1), interned_state(SSTATE_NOT_INTERNED) {
assert(s);
RELEASE_ASSERT(n != llvm::StringRef::npos, "");
memmove(data(), s, n);
memcpy(data(), s, n);
data()[n] = 0;
}
BoxedString::BoxedString(llvm::StringRef lhs, llvm::StringRef rhs) : interned_state(SSTATE_NOT_INTERNED) {
BoxedString::BoxedString(llvm::StringRef lhs, llvm::StringRef rhs) : hash(-1), interned_state(SSTATE_NOT_INTERNED) {
RELEASE_ASSERT(lhs.size() + rhs.size() != llvm::StringRef::npos, "");
memmove(data(), lhs.data(), lhs.size());
memmove(data() + lhs.size(), rhs.data(), rhs.size());
memcpy(data(), lhs.data(), lhs.size());
memcpy(data() + lhs.size(), rhs.data(), rhs.size());
data()[lhs.size() + rhs.size()] = 0;
}
BoxedString::BoxedString(llvm::StringRef s) : interned_state(SSTATE_NOT_INTERNED) {
BoxedString::BoxedString(llvm::StringRef s) : hash(-1), interned_state(SSTATE_NOT_INTERNED) {
RELEASE_ASSERT(s.size() != llvm::StringRef::npos, "");
memmove(data(), s.data(), s.size());
memcpy(data(), s.data(), s.size());
data()[s.size()] = 0;
}
BoxedString::BoxedString(size_t n, char c) : interned_state(SSTATE_NOT_INTERNED) {
BoxedString::BoxedString(size_t n, char c) : hash(-1), interned_state(SSTATE_NOT_INTERNED) {
RELEASE_ASSERT(n != llvm::StringRef::npos, "");
memset(data(), c, n);
data()[n] = 0;
}
BoxedString::BoxedString(size_t n) : interned_state(SSTATE_NOT_INTERNED) {
BoxedString::BoxedString(size_t n) : hash(-1), interned_state(SSTATE_NOT_INTERNED) {
RELEASE_ASSERT(n != llvm::StringRef::npos, "");
// Note: no memset. add the null-terminator for good measure though
// (CPython does the same thing).
......@@ -1586,13 +1586,15 @@ extern "C" size_t strHashUnboxed(BoxedString* self) {
#ifdef Py_DEBUG
assert(_Py_HashSecret_Initialized);
#endif
if (self->hash != -1)
return self->hash;
long len = Py_SIZE(self);
/*
We make the hash of the empty string be 0, rather than using
(prefix ^ suffix), since this slightly obfuscates the hash secret
*/
if (len == 0) {
self->hash = 0;
return 0;
}
p = self->s().data();
......@@ -1604,7 +1606,7 @@ extern "C" size_t strHashUnboxed(BoxedString* self) {
x ^= _Py_HashSecret.suffix;
if (x == -1)
x = -2;
self->hash = x;
return x;
}
......@@ -1662,6 +1664,11 @@ Box* _strSlice(BoxedString* self, i64 start, i64 stop, i64 step, i64 length) {
if (length == 0)
return EmptyString;
if (length == 1) {
char c = self->s()[start];
return characters[c & UCHAR_MAX];
}
BoxedString* bs = BoxedString::createUninitializedString(length);
copySlice(bs->data(), s.data(), start, step, length);
return bs;
......@@ -2531,19 +2538,18 @@ extern "C" int _PyString_Resize(PyObject** pv, Py_ssize_t newsize) noexcept {
if (newsize < s->size()) {
// XXX resize the box (by reallocating) smaller if it makes sense
s->ob_size = newsize;
s->hash = -1; /* invalidate cached hash value */
s->data()[newsize] = 0;
return 0;
}
BoxedString* resized;
if (s->cls == str_cls)
resized = new (newsize) BoxedString(newsize, 0); // we need an uninitialized string, but this will memset
resized = BoxedString::createUninitializedString(newsize);
else
resized = new (s->cls, newsize)
BoxedString(newsize, 0); // we need an uninitialized string, but this will memset
memmove(resized->data(), s->data(), s->size());
resized = BoxedString::createUninitializedString(s->cls, newsize);
memcpy(resized->data(), s->data(), s->size());
resized->data()[newsize] = 0;
*pv = resized;
return 0;
}
......
......@@ -175,6 +175,12 @@ extern "C" void printFloat(double d);
Box* objectStr(Box*);
Box* objectRepr(Box*);
void checkAndThrowCAPIException();
void throwCAPIException() __attribute__((noreturn));
void ensureCAPIExceptionSet();
struct ExcInfo;
void setCAPIException(const ExcInfo& e);
// In Pyston, this is the same type as CPython's PyTypeObject (they are interchangeable, but we
// use BoxedClass in Pyston wherever possible as a convention).
class BoxedClass : public BoxVar {
......@@ -387,6 +393,7 @@ public:
// optimizations and inlining, creating a new one each time shouldn't have any cost.
llvm::StringRef s() const { return llvm::StringRef(s_data, ob_size); };
long hash; // -1 means not yet computed
char interned_state;
char* data() { return s_data; }
......@@ -430,6 +437,7 @@ public:
// creates an uninitialized string of length n; useful for directly constructing into the string and avoiding
// copies:
static BoxedString* createUninitializedString(ssize_t n) { return new (n) BoxedString(n); }
static BoxedString* createUninitializedString(BoxedClass* cls, ssize_t n) { return new (cls, n) BoxedString(n); }
// Gets a writeable pointer to the contents of a string.
// Is only meant to be used with something just created from createUninitializedString(), though
......@@ -447,6 +455,7 @@ private:
};
extern "C" size_t strHashUnboxed(BoxedString* self);
extern "C" int64_t hashUnboxed(Box* obj);
class BoxedInstanceMethod : public Box {
public:
......@@ -678,15 +687,33 @@ static_assert(offsetof(BoxedTuple, elts) == offsetof(PyTupleObject, ob_item), ""
extern BoxedString* characters[UCHAR_MAX + 1];
struct PyHasher {
size_t operator()(Box*) const;
size_t operator()(Box* b) const {
if (b->cls == str_cls) {
auto s = static_cast<BoxedString*>(b);
if (s->hash != -1)
return s->hash;
return strHashUnboxed(s);
}
return hashUnboxed(b);
}
};
struct PyEq {
bool operator()(Box*, Box*) const;
bool operator()(Box* lhs, Box* rhs) const {
int r = PyObject_RichCompareBool(lhs, rhs, Py_EQ);
if (r == -1)
throwCAPIException();
return (bool)r;
}
};
struct PyLt {
bool operator()(Box*, Box*) const;
bool operator()(Box* lhs, Box* rhs) const {
int r = PyObject_RichCompareBool(lhs, rhs, Py_LT);
if (r == -1)
throwCAPIException();
return (bool)r;
}
};
// llvm::DenseMap doesn't store the original hash values, choosing to instead
......@@ -1077,12 +1104,6 @@ AST* unboxAst(Box* b);
// Our default for tp_alloc:
extern "C" PyObject* PystonType_GenericAlloc(BoxedClass* cls, Py_ssize_t nitems) noexcept;
void checkAndThrowCAPIException();
void throwCAPIException() __attribute__((noreturn));
void ensureCAPIExceptionSet();
struct ExcInfo;
void setCAPIException(const ExcInfo& e);
#define fatalOrError(exception, message) \
do { \
if (CONTINUE_AFTER_FATAL) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment