Merge pull request #771 from kmod/threading_local_debugging

Add some PerThreadSet features

Merge pull request #771 from kmod/threading_local_debugging
Add some PerThreadSet features
ce62f37e · Kevin Modzelewski · f49d0937 · 273bf3d8 · ce62f37e · ce62f37e
Commit ce62f37e authored Jul 28, 2015 by Kevin Modzelewski
6 changed files
--- a/src/core/thread_utils.h
+++ b/src/core/thread_utils.h
@@ -144,7 +144,28 @@ template <int N, int... S> struct gens : gens<N - 1, N - 1, S...> {};
 template <int... S> struct gens<0, S...> { typedef seq<S...> type; };
 }
-template <typename T, typename... CtorArgs> class PerThreadSet {
+class PerThreadSetBase {
+private:
+    static std::unordered_set<PerThreadSetBase*> all_instances;
+protected:
+    PerThreadSetBase() { all_instances.insert(this); }
+    virtual ~PerThreadSetBase() {
+        assert(all_instances.count(this));
+        all_instances.erase(this);
+    }
+    virtual void onFork() = 0;
+public:
+    static void runAllForkHandlers() {
+        for (auto inst : all_instances)
+            inst->onFork();
+    }
+};
+template <typename T, typename... CtorArgs> class PerThreadSet : public PerThreadSetBase {
 private:
    pthread_key_t pthread_key;
    PthreadFastMutex lock;
@@ -159,20 +180,26 @@ private:
    std::unordered_map<pthread_t, Storage*> map;
    std::tuple<CtorArgs...> ctor_args;
+#ifndef NDEBUG
+    int map_elts = 0;
+#endif
    static void dtor(void* val) {
        Storage* s = static_cast<Storage*>(val);
        assert(s);
        auto* self = s->self;
        LOCK_REGION(&self->lock);
+        ASSERT(self->map.size() == self->map_elts, "%ld %d", self->map.size(), self->map_elts);
        assert(s->my_tid == pthread_self());
-        // I assume this destructor gets called on the same thread
-        // that this data is bound to:
        assert(self->map.count(pthread_self()));
        self->map.erase(pthread_self());
+#ifndef NDEBUG
+        self->map_elts--;
+#endif
        delete s;
    }
@@ -185,6 +212,21 @@ private:
                           .val = T(std::get<S>(ctor_args)...) };
    }
+protected:
+    void onFork() override {
+        pthread_t surviving_ptid = pthread_self();
+        for (auto it = this->map.begin(), end = this->map.end(); it != end;) {
+            if (it->first != surviving_ptid) {
+                delete it->second;
+                it = this->map.erase(it);
+#ifndef NDEBUG
+                this->map_elts--;
+#endif
+            } else
+                ++it;
+        }
+    }
 public:
    PerThreadSet(CtorArgs... ctor_args) : ctor_args(std::forward<CtorArgs>(ctor_args)...) {
        int code = pthread_key_create(&pthread_key, &dtor);
@@ -214,6 +256,12 @@ public:
            s = make(typename impl::gens<sizeof...(CtorArgs)>::type());
            LOCK_REGION(&lock);
+#ifndef NDEBUG
+            assert(map.size() == map_elts);
+            map_elts++;
+#endif
            int code = pthread_setspecific(pthread_key, s);
            assert(code == 0);

--- a/src/core/threading.cpp
+++ b/src/core/threading.cpp
@@ -35,6 +35,8 @@
 namespace pyston {
 namespace threading {
+std::unordered_set<PerThreadSetBase*> PerThreadSetBase::all_instances;
 extern "C" {
 __thread PyThreadState cur_thread_state
    = { 0, NULL, NULL, NULL, NULL }; // not sure if we need to explicitly request zero-initialization
@@ -508,7 +510,7 @@ extern "C" void PyEval_ReInitThreads() noexcept {
    num_starting_threads = 0;
    threads_waiting_on_gil = 0;
-    // TODO we should clean up all created PerThreadSets, such as the one used in the heap for thread-local-caches.
+    PerThreadSetBase::runAllForkHandlers();
 }
 void acquireGLWrite() {

--- a/src/gc/collector.cpp
+++ b/src/gc/collector.cpp
@@ -113,10 +113,10 @@ private:
 public:
    TraceStack(TraceStackType type) : visit_type(type) { get_chunk(); }
-    TraceStack(TraceStackType type, const std::unordered_set<void*>& root_handles) : visit_type(type) {
+    TraceStack(TraceStackType type, const std::unordered_set<void*>& roots) : visit_type(type) {
        get_chunk();
-        for (void* p : root_handles) {
+        for (void* p : roots) {
-            assert(!isMarked(GCAllocation::fromUserData(p)));
+            ASSERT(!isMarked(GCAllocation::fromUserData(p)), "");
            push(p);
        }
    }

--- a/src/gc/heap.cpp
+++ b/src/gc/heap.cpp
@@ -411,7 +411,37 @@ GCAllocation* SmallArena::allocationFrom(void* ptr) {
    return reinterpret_cast<GCAllocation*>(&b->atoms[atom_idx]);
 }
+#ifndef NDEBUG
+void SmallArena::assertConsistent() {
+    std::unordered_set<Block*> seen_blocks;
+    auto scan = [&seen_blocks](Block* h) {
+        while (h) {
+            ASSERT(h >= (void*)SMALL_ARENA_START && h < (void*)LARGE_ARENA_START, "%p", h);
+            assert(!seen_blocks.count(h));
+            seen_blocks.insert(h);
+            if (h->next)
+                assert(h->next->prev == &h->next);
+            h = h->next;
+        }
+    };
+    thread_caches.forEachValue([&scan](ThreadBlockCache* cache) {
+        for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
+            scan(cache->cache_free_heads[bidx]);
+            scan(cache->cache_full_heads[bidx]);
+        }
+    });
+    for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
+        scan(full_heads[bidx]);
+        scan(heads[bidx]);
+    }
+}
+#endif
 void SmallArena::freeUnmarked(std::vector<Box*>& weakly_referenced) {
+    assertConsistent();
    thread_caches.forEachValue([this, &weakly_referenced](ThreadBlockCache* cache) {
        for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
            Block* h = cache->cache_free_heads[bidx];

--- a/src/gc/heap.h
+++ b/src/gc/heap.h
@@ -274,6 +274,12 @@ public:
    void prepareForCollection() {}
    void cleanupAfterCollection() {}
+#ifndef NDEBUG
+    void assertConsistent();
+#else
+    void assertConsistent() {}
+#endif
 private:
    template <int N> class Bitmap {
        static_assert(N % 64 == 0, "");

--- a/test/tests/multiprocessing_test.py
+++ b/test/tests/multiprocessing_test.py
@@ -17,6 +17,7 @@ if __name__ == '__main__':
    p = multiprocessing.Process(target=f, args=('bob',))
    p.start()
    p.join()
+    print p.exitcode
 def f(q):
@@ -28,3 +29,5 @@ if __name__ == '__main__':
    p.start()
    print q.get()    # prints "[42, None, 'hello']"
    p.join()
+    print p.exitcode
+print "done"