Commit 36bac9a5 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Get dict ordering closer to CPython's

Rather than switching to CPython's dict implementation, which would
be quite a bit of code changes (since we use our internal dict interface
a lot), change our current dict implementation to more-closely match CPython's.

In particular, change the growth strategy and probing strategy to match cpython's.

I'm not sure how I feel about this -- there are still a number of differences.
In theory this is still an improvement over the current status quo (fewer ordering
differences), but I don't know if we need to go all the way and make sure to have
zero ordering differences.
parent 00d51fe1
......@@ -31,6 +31,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// The following Pyston changes were made:
// - Hash codes are now size_t (64bit) instead of unsigned (32bit)
// - The map uses the Python allocator
// - The initial size is now configurable
// - The probing policy has been changed from quadratic to CPython's "perturb" system
// - The growth policy has been changed to match CPython's
// (quadruple until 50k, then double; max fill factor 3/4)
#ifndef PYSTON_CORE_FROMLLVM_DENSEMAP_H
#define PYSTON_CORE_FROMLLVM_DENSEMAP_H
......@@ -43,7 +51,6 @@
#include <new>
#include <utility>
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/Support/AlignOf.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MathExtras.h"
......@@ -53,6 +60,8 @@
// Pyston change: use the python allocator
#include "Python.h"
#include "core/from_llvm/DenseMapInfo.h"
namespace pyston {
// This should only take effect for this header file:
......@@ -367,11 +376,11 @@ protected:
std::swap(getNumTombstones(), RHS.getNumTombstones());
}
static unsigned getHashValue(const KeyT &Val) {
static size_t getHashValue(const KeyT &Val) {
return KeyInfoT::getHashValue(Val);
}
template<typename LookupKeyT>
static unsigned getHashValue(const LookupKeyT &Val) {
static size_t getHashValue(const LookupKeyT &Val) {
return KeyInfoT::getHashValue(Val);
}
static const KeyT getEmptyKey() {
......@@ -469,8 +478,8 @@ private:
// causing infinite loops in lookup.
unsigned NewNumEntries = getNumEntries() + 1;
unsigned NumBuckets = getNumBuckets();
if (LLVM_UNLIKELY(NewNumEntries * 4 >= NumBuckets * 3)) {
this->grow(NumBuckets * 2);
if (LLVM_UNLIKELY(NewNumEntries * 3 >= NumBuckets * 2)) {
this->grow(NumBuckets * (NumBuckets > 50000 ? 2 : 4));
LookupBucketFor(Key, TheBucket);
NumBuckets = getNumBuckets();
} else if (LLVM_UNLIKELY(NumBuckets-(NewNumEntries+getNumTombstones()) <=
......@@ -515,10 +524,14 @@ private:
!KeyInfoT::isEqual(Val, TombstoneKey) &&
"Empty/Tombstone value shouldn't be inserted into map!");
unsigned BucketNo = getHashValue(Val) & (NumBuckets-1);
unsigned ProbeAmt = 1;
size_t mask = (NumBuckets - 1);
size_t perturb = getHashValue(Val);
size_t i = perturb & mask;
static_assert(sizeof(perturb) == sizeof(void*), "");
while (1) {
const BucketT *ThisBucket = BucketsPtr + BucketNo;
const BucketT *ThisBucket = BucketsPtr + (i & mask);
// Found Val's bucket? If so, return it.
if (LLVM_LIKELY(KeyInfoT::isEqual(Val, ThisBucket->getFirst()))) {
FoundBucket = ThisBucket;
......@@ -542,8 +555,8 @@ private:
// Otherwise, it's a hash collision or a tombstone, continue quadratic
// probing.
BucketNo += ProbeAmt++;
BucketNo &= (NumBuckets-1);
i = (i << 2) + i + perturb + 1;
perturb >>= 5;
}
}
......
// This file was copied from https://llvm.org/svn/llvm-project/llvm/trunk/include/llvm/ADT/DenseMapInfo.h?p=230300
// and came with the following license:
//===- llvm/ADT/DenseMapInfo.h - Type traits for DenseMap -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines DenseMapInfo traits for DenseMap.
//
//===----------------------------------------------------------------------===//
// Modifications were made for Pyston, using the following license:
// Copyright (c) 2014-2016 Dropbox, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The following Pyston changes were made:
// - Hash codes are now size_t (64bit) instead of unsigned (32bit)
// - The map uses the Python allocator
// - The initial size is now configurable
// - The probing policy has been changed from quadratic to CPython's "perturb" system
// - The growth policy has been changed to match CPython's
// (quadruple until 50k, then double; max fill factor 3/4)
#ifndef PYSTON_CORE_FROMLLVM_DENSEMAPINFO_H
#define PYSTON_CORE_FROMLLVM_DENSEMAPINFO_H
#include "llvm/Support/PointerLikeTypeTraits.h"
#include "llvm/Support/type_traits.h"
namespace pyston {
// This should only take effect for this header file:
using namespace llvm;
template<typename T>
struct DenseMapInfo {
//static inline T getEmptyKey();
//static inline T getTombstoneKey();
//static unsigned getHashValue(const T &Val);
//static bool isEqual(const T &LHS, const T &RHS);
};
// Provide DenseMapInfo for all pointers.
template<typename T>
struct DenseMapInfo<T*> {
static inline T* getEmptyKey() {
uintptr_t Val = static_cast<uintptr_t>(-1);
Val <<= PointerLikeTypeTraits<T*>::NumLowBitsAvailable;
return reinterpret_cast<T*>(Val);
}
static inline T* getTombstoneKey() {
uintptr_t Val = static_cast<uintptr_t>(-2);
Val <<= PointerLikeTypeTraits<T*>::NumLowBitsAvailable;
return reinterpret_cast<T*>(Val);
}
static size_t getHashValue(const T *PtrVal) {
return (size_t((uintptr_t)PtrVal) >> 4) ^
(size_t((uintptr_t)PtrVal) >> 9);
}
static bool isEqual(const T *LHS, const T *RHS) { return LHS == RHS; }
};
// Provide DenseMapInfo for chars.
template<> struct DenseMapInfo<char> {
static inline char getEmptyKey() { return ~0; }
static inline char getTombstoneKey() { return ~0 - 1; }
static size_t getHashValue(const char& Val) { return Val * 37U; }
static bool isEqual(const char &LHS, const char &RHS) {
return LHS == RHS;
}
};
// Provide DenseMapInfo for unsigned ints.
template<> struct DenseMapInfo<unsigned> {
static inline unsigned getEmptyKey() { return ~0U; }
static inline unsigned getTombstoneKey() { return ~0U - 1; }
static size_t getHashValue(const unsigned& Val) { return Val * 37U; }
static bool isEqual(const unsigned& LHS, const unsigned& RHS) {
return LHS == RHS;
}
};
// Provide DenseMapInfo for unsigned longs.
template<> struct DenseMapInfo<unsigned long> {
static inline unsigned long getEmptyKey() { return ~0UL; }
static inline unsigned long getTombstoneKey() { return ~0UL - 1L; }
static size_t getHashValue(const unsigned long& Val) {
return (unsigned)(Val * 37UL);
}
static bool isEqual(const unsigned long& LHS, const unsigned long& RHS) {
return LHS == RHS;
}
};
// Provide DenseMapInfo for unsigned long longs.
template<> struct DenseMapInfo<unsigned long long> {
static inline unsigned long long getEmptyKey() { return ~0ULL; }
static inline unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; }
static size_t getHashValue(const unsigned long long& Val) {
return (unsigned)(Val * 37ULL);
}
static bool isEqual(const unsigned long long& LHS,
const unsigned long long& RHS) {
return LHS == RHS;
}
};
// Provide DenseMapInfo for ints.
template<> struct DenseMapInfo<int> {
static inline int getEmptyKey() { return 0x7fffffff; }
static inline int getTombstoneKey() { return -0x7fffffff - 1; }
static size_t getHashValue(const int& Val) { return (unsigned)(Val * 37U); }
static bool isEqual(const int& LHS, const int& RHS) {
return LHS == RHS;
}
};
// Provide DenseMapInfo for longs.
template<> struct DenseMapInfo<long> {
static inline long getEmptyKey() {
return (1UL << (sizeof(long) * 8 - 1)) - 1UL;
}
static inline long getTombstoneKey() { return getEmptyKey() - 1L; }
static size_t getHashValue(const long& Val) {
return (unsigned)(Val * 37UL);
}
static bool isEqual(const long& LHS, const long& RHS) {
return LHS == RHS;
}
};
// Provide DenseMapInfo for long longs.
template<> struct DenseMapInfo<long long> {
static inline long long getEmptyKey() { return 0x7fffffffffffffffLL; }
static inline long long getTombstoneKey() { return -0x7fffffffffffffffLL-1; }
static size_t getHashValue(const long long& Val) {
return (unsigned)(Val * 37ULL);
}
static bool isEqual(const long long& LHS,
const long long& RHS) {
return LHS == RHS;
}
};
// Provide DenseMapInfo for all pairs whose members have info.
template<typename T, typename U>
struct DenseMapInfo<std::pair<T, U> > {
typedef std::pair<T, U> Pair;
typedef DenseMapInfo<T> FirstInfo;
typedef DenseMapInfo<U> SecondInfo;
static inline Pair getEmptyKey() {
return std::make_pair(FirstInfo::getEmptyKey(),
SecondInfo::getEmptyKey());
}
static inline Pair getTombstoneKey() {
return std::make_pair(FirstInfo::getTombstoneKey(),
SecondInfo::getTombstoneKey());
}
static size_t getHashValue(const Pair& PairVal) {
uint64_t key = (uint64_t)FirstInfo::getHashValue(PairVal.first) << 32
| (uint64_t)SecondInfo::getHashValue(PairVal.second);
key += ~(key << 32);
key ^= (key >> 22);
key += ~(key << 13);
key ^= (key >> 8);
key += (key << 3);
key ^= (key >> 15);
key += ~(key << 27);
key ^= (key >> 31);
return (size_t)key;
}
static bool isEqual(const Pair &LHS, const Pair &RHS) {
return FirstInfo::isEqual(LHS.first, RHS.first) &&
SecondInfo::isEqual(LHS.second, RHS.second);
}
};
} // end namespace llvm
#endif
......@@ -139,7 +139,7 @@ template <> struct DenseMapInfo<pyston::InternedString> {
return pyston::InternedString((pyston::BoxedString*)-1);
#endif
}
static unsigned getHashValue(const pyston::InternedString& val) { return std::hash<pyston::InternedString>()(val); }
static size_t getHashValue(const pyston::InternedString& val) { return std::hash<pyston::InternedString>()(val); }
static bool isEqual(const pyston::InternedString& lhs, const pyston::InternedString& rhs) { return lhs == rhs; }
};
}
......
# expected: fail
# - we don't order attributes the same way as CPython
import random
random.seed(12345)
def randchr():
return chr(int(random.random() * 26) + ord('a'))
def randstr(n):
return ''.join([randchr() for i in xrange(n)])
d = {}
class C(object):
pass
for i in xrange(20):
setattr(C, "attr_" + randstr(5), i)
for k, v in C.__dict__.items():
if not k.startswith("attr_"):
continue
print k, v
import random
random.seed(12345)
def randchr():
return chr(int(random.random() * 26) + ord('a'))
def randstr(n):
return ''.join([randchr() for i in xrange(n)])
d = {}
def add():
d[randstr(5)] = i
print len(d)
print d
print d.items()
def pop():
del d[d.keys()[0]]
print len(d)
print d
print d.items()
for i in xrange(100):
add()
print_final = d.values()
for i in xrange(100):
pop()
add()
pop()
print print_final
# expected: fail
# - we don't order globals the same way as CPython
import random
random.seed(12345)
def randchr():
return chr(int(random.random() * 26) + ord('a'))
def randstr(n):
return ''.join([randchr() for i in xrange(n)])
d = {}
for i in xrange(20):
globals()["attr_" + randstr(5)] = i
for k, v in globals().items():
if not k.startswith("attr_"):
continue
print k, v
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment