Go back to the free-classes-after-instances approach

I think this is actually pretty decent, as long as we don't depend on the class's gc pointers being dereferenceable. We could go back to having the finalization-ordering algorithm handle this part, but it gets tricky when the class can refer back to instances of itself (the situation that prompted this).

Go back to the free-classes-after-instances approach
I think this is actually pretty decent, as long as we don't depend on the class's gc pointers being dereferenceable. We could go back to having the finalization-ordering algorithm handle this part, but it gets tricky when the class can refer back to instances of itself (the situation that prompted this).
0aa5dea8 · Kevin Modzelewski · 4d548d3b · 0aa5dea8 · 0aa5dea8 · 0aa5dea8
Commit 0aa5dea8 authored Sep 17, 2015 by Kevin Modzelewski
Hide whitespace changes
Inline Side-by-side

Showing with 90 additions and 48 deletions

src/gc/collector.cpp src/gc/collector.cpp +32 -5

src/gc/heap.cpp src/gc/heap.cpp +28 -23

src/gc/heap.h src/gc/heap.h +30 -20

No files found.
--- a/src/gc/collector.cpp
+++ b/src/gc/collector.cpp
@@ -17,6 +17,7 @@
 #include <cassert>
 #include <cstdio>
 #include <cstdlib>
+#include <llvm/ADT/DenseSet.h>

 #include "asm_writing/icinfo.h"
 #include "codegen/ast_interpreter.h"
@@ -753,7 +754,7 @@ static void prepareWeakrefCallbacks(Box* box) {
    }
 }

-static void markPhase() {
+void markPhase() {
    static StatCounter sc_us("us_gc_mark_phase");
    Timer _t("markPhase", /*min_usec=*/10000);

@@ -787,13 +788,13 @@ static void markPhase() {
    sc_us.log(us);
 }

-static void sweepPhase(std::vector<Box*>& weakly_referenced) {
+static void sweepPhase(std::vector<Box*>& weakly_referenced, std::vector<BoxedClass*>& classes_to_free) {
    static StatCounter sc_us("us_gc_sweep_phase");
    Timer _t("sweepPhase", /*min_usec=*/10000);

    // we need to use the allocator here because these objects are referenced only here, and calling the weakref
    // callbacks could start another gc
-    global_heap.freeUnmarked(weakly_referenced);
+    global_heap.freeUnmarked(weakly_referenced, classes_to_free);

    long us = _t.end();
    sc_us.log(us);
@@ -973,7 +974,15 @@ void runCollection() {
    // since the deallocation of other objects (namely, the weakref objects themselves) can affect
    // those lists, and we want to see the final versions.
    std::vector<Box*> weakly_referenced;
-    sweepPhase(weakly_referenced);
+
+    // Separately keep track of classes that we will be freeing in this collection.
+    // We want to make sure that any instances get freed before the class itself gets freed,
+    // since the freeing logic can look at the class object.
+    // So instead of directly freeing the classes, we stuff them into this vector and then
+    // free them at the end.
+    std::vector<BoxedClass*> classes_to_free;
+
+    sweepPhase(weakly_referenced, classes_to_free);

    // Handle weakrefs in two passes:
    // - first, find all of the weakref objects whose callbacks we need to call.  we need to iterate
@@ -984,7 +993,25 @@ void runCollection() {
        assert(isValidGCObject(o));
        GC_TRACE_LOG("%p is weakly referenced\n", o);
        prepareWeakrefCallbacks(o);
-        global_heap.free(GCAllocation::fromUserData(o));
+
+        if (PyType_Check(o))
+            classes_to_free.push_back(static_cast<BoxedClass*>(o));
+        else
+            global_heap.free(GCAllocation::fromUserData(o));
+    }
+
+    // We want to make sure that classes get freed before their metaclasses.
+    // Use a simple approach of only freeing one level of the hierarchy and then
+    // letting the next collection do the next one.
+    llvm::DenseSet<BoxedClass*> classes_to_not_free;
+    for (auto b : classes_to_free) {
+        classes_to_not_free.insert(b->cls);
+    }
+
+    for (auto b : classes_to_free) {
+        if (classes_to_not_free.count(b))
+            continue;
+        global_heap._setFree(GCAllocation::fromUserData(b));
    }

    global_heap.cleanupAfterCollection();

--- a/src/gc/heap.cpp
+++ b/src/gc/heap.cpp
@@ -45,7 +45,7 @@ template <> void return_temporary_buffer<pyston::Box*>(pyston::Box** p) {
 namespace pyston {
 namespace gc {

-bool _doFree(GCAllocation* al, std::vector<Box*>* weakly_referenced);
+bool _doFree(GCAllocation* al, std::vector<Box*>* weakly_referenced, std::vector<BoxedClass*>* classes_to_free);

 // lots of linked lists around here, so let's just use template functions for operations on them.
 template <class ListT> inline void nullNextPrev(ListT* node) {
@@ -88,7 +88,8 @@ template <class ListT, typename Func> inline void forEach(ListT* list, Func func
 }

 template <class ListT, typename Free>
-inline void sweepList(ListT* head, std::vector<Box*>& weakly_referenced, Free free_func) {
+inline void sweepList(ListT* head, std::vector<Box*>& weakly_referenced, std::vector<BoxedClass*>& classes_to_free,
+                      Free free_func) {
    auto cur = head;
    while (cur) {
        GCAllocation* al = cur->data;
@@ -97,7 +98,7 @@ inline void sweepList(ListT* head, std::vector<Box*>& weakly_referenced, Free fr
            clearMark(al);
            cur = cur->next;
        } else {
-            if (_doFree(al, &weakly_referenced)) {
+            if (_doFree(al, &weakly_referenced, &classes_to_free)) {
                removeFromLL(cur);

                auto to_free = cur;
@@ -128,12 +129,7 @@ void _bytesAllocatedTripped() {
 /// Finalizers

 bool hasOrderedFinalizer(BoxedClass* cls) {
-    if (PyType_FastSubclass(cls, Py_TPFLAGS_TYPE_SUBCLASS)) {
-        // Class objects need to be kept alive for at least as long as any objects that point
-        // to them, even if those objects are garbage (or involved in finalizer chains).
-        // Marking class objects as having finalizers should get us this behavior.
-        return true;
-    } else if (cls->has_safe_tp_dealloc) {
+    if (cls->has_safe_tp_dealloc) {
        ASSERT(!cls->tp_del, "class \"%s\" with safe tp_dealloc also has tp_del?", cls->tp_name);
        return false;
    } else if (cls->hasNonDefaultTpDealloc()) {
@@ -164,7 +160,8 @@ __attribute__((always_inline)) bool isWeaklyReferenced(Box* b) {

 Heap global_heap;

-__attribute__((always_inline)) bool _doFree(GCAllocation* al, std::vector<Box*>* weakly_referenced) {
+__attribute__((always_inline)) bool _doFree(GCAllocation* al, std::vector<Box*>* weakly_referenced,
+                                            std::vector<BoxedClass*>* classes_to_free) {
    static StatCounter gc_safe_destructors("gc_safe_destructor_calls");

 #ifndef NVALGRIND
@@ -195,6 +192,13 @@ __attribute__((always_inline)) bool _doFree(GCAllocation* al, std::vector<Box*>*

        ASSERT(!hasOrderedFinalizer(b->cls) || hasFinalized(al), "%s", getTypeName(b));

+        // Note: do this check after the weakrefs check.
+        if (PyType_Check(b)) {
+            assert(classes_to_free);
+            classes_to_free->push_back(static_cast<BoxedClass*>(b));
+            return false;
+        }
+
        if (b->cls->tp_dealloc != dealloc_null && b->cls->has_safe_tp_dealloc) {
            GC_TRACE_LOG("running safe destructor for %p\n", b);
            gc_safe_destructors.log();
@@ -211,7 +215,7 @@ __attribute__((always_inline)) bool _doFree(GCAllocation* al, std::vector<Box*>*
 }

 void Heap::destructContents(GCAllocation* al) {
-    _doFree(al, NULL);
+    _doFree(al, NULL, NULL);
 }

 struct HeapStatistics {
@@ -484,10 +488,10 @@ void SmallArena::forEachReference(std::function<void(GCAllocation*, size_t)> f)
    }
 }

-void SmallArena::freeUnmarked(std::vector<Box*>& weakly_referenced) {
+void SmallArena::freeUnmarked(std::vector<Box*>& weakly_referenced, std::vector<BoxedClass*>& classes_to_free) {
    assertConsistent();

-    thread_caches.forEachValue([this, &weakly_referenced](ThreadBlockCache* cache) {
+    thread_caches.forEachValue([this, &weakly_referenced, &classes_to_free](ThreadBlockCache* cache) {
        // Iterate over the buckets from largest to smallest.  I don't think it really matters, but
        // doing it in this order means that we will tend to free types early in the sweep (since they
        // are fairly large), and we are more likely to detect if other code depended on that type
@@ -511,8 +515,8 @@ void SmallArena::freeUnmarked(std::vector<Box*>& weakly_referenced) {
                insertIntoLL(&heads[bidx], h);
            }

-            Block** chain_end = _freeChain(&cache->cache_free_heads[bidx], weakly_referenced);
-            _freeChain(&cache->cache_full_heads[bidx], weakly_referenced);
+            Block** chain_end = _freeChain(&cache->cache_free_heads[bidx], weakly_referenced, classes_to_free);
+            _freeChain(&cache->cache_full_heads[bidx], weakly_referenced, classes_to_free);

            while (Block* b = cache->cache_full_heads[bidx]) {
                removeFromLLAndNull(b);
@@ -522,8 +526,8 @@ void SmallArena::freeUnmarked(std::vector<Box*>& weakly_referenced) {
    });

    for (int bidx = NUM_BUCKETS - 1; bidx >= 0; bidx--) {
-        Block** chain_end = _freeChain(&heads[bidx], weakly_referenced);
-        _freeChain(&full_heads[bidx], weakly_referenced);
+        Block** chain_end = _freeChain(&heads[bidx], weakly_referenced, classes_to_free);
+        _freeChain(&full_heads[bidx], weakly_referenced, classes_to_free);

        while (Block* b = full_heads[bidx]) {
            removeFromLLAndNull(b);
@@ -550,7 +554,8 @@ void SmallArena::getStatistics(HeapStatistics* stats) {
 }


-SmallArena::Block** SmallArena::_freeChain(Block** head, std::vector<Box*>& weakly_referenced) {
+SmallArena::Block** SmallArena::_freeChain(Block** head, std::vector<Box*>& weakly_referenced,
+                                           std::vector<BoxedClass*>& classes_to_free) {
    while (Block* b = *head) {
        int num_objects = b->numObjects();
        int first_obj = b->minObjIndex();
@@ -575,7 +580,7 @@ SmallArena::Block** SmallArena::_freeChain(Block** head, std::vector<Box*>& weak
            if (isMarked(al)) {
                clearMark(al);
            } else {
-                if (_doFree(al, &weakly_referenced)) {
+                if (_doFree(al, &weakly_referenced, &classes_to_free)) {
                    // GC_TRACE_LOG("freeing %p\n", al->user_data);
                    b->isfree.set(atom_idx);
 #ifndef NDEBUG
@@ -814,8 +819,8 @@ void LargeArena::cleanupAfterCollection() {
    lookup.clear();
 }

-void LargeArena::freeUnmarked(std::vector<Box*>& weakly_referenced) {
-    sweepList(head, weakly_referenced, [this](LargeObj* ptr) { _freeLargeObj(ptr); });
+void LargeArena::freeUnmarked(std::vector<Box*>& weakly_referenced, std::vector<BoxedClass*>& classes_to_free) {
+    sweepList(head, weakly_referenced, classes_to_free, [this](LargeObj* ptr) { _freeLargeObj(ptr); });
 }

 void LargeArena::getStatistics(HeapStatistics* stats) {
@@ -1023,8 +1028,8 @@ void HugeArena::cleanupAfterCollection() {
    lookup.clear();
 }

-void HugeArena::freeUnmarked(std::vector<Box*>& weakly_referenced) {
-    sweepList(head, weakly_referenced, [this](HugeObj* ptr) { _freeHugeObj(ptr); });
+void HugeArena::freeUnmarked(std::vector<Box*>& weakly_referenced, std::vector<BoxedClass*>& classes_to_free) {
+    sweepList(head, weakly_referenced, classes_to_free, [this](HugeObj* ptr) { _freeHugeObj(ptr); });
 }

 void HugeArena::getStatistics(HeapStatistics* stats) {

--- a/src/gc/heap.h
+++ b/src/gc/heap.h
@@ -272,7 +272,7 @@ public:
    void free(GCAllocation* al);

    GCAllocation* allocationFrom(void* ptr);
-    void freeUnmarked(std::vector<Box*>& weakly_referenced);
+    void freeUnmarked(std::vector<Box*>& weakly_referenced, std::vector<BoxedClass*>& classes_to_free);

    void getStatistics(HeapStatistics* stats);

@@ -414,7 +414,7 @@ private:
    Block* _allocBlock(uint64_t size, Block** prev);
    GCAllocation* _allocFromBlock(Block* b);
    Block* _claimBlock(size_t rounded_size, Block** free_head);
-    Block** _freeChain(Block** head, std::vector<Box*>& weakly_referenced);
+    Block** _freeChain(Block** head, std::vector<Box*>& weakly_referenced, std::vector<BoxedClass*>& classes_to_free);
    void _getChainStatistics(HeapStatistics* stats, Block** head);

    GCAllocation* _alloc(size_t bytes, int bucket_idx);
@@ -496,7 +496,7 @@ public:
    void free(GCAllocation* alloc);

    GCAllocation* allocationFrom(void* ptr);
-    void freeUnmarked(std::vector<Box*>& weakly_referenced);
+    void freeUnmarked(std::vector<Box*>& weakly_referenced, std::vector<BoxedClass*>& classes_to_free);

    void getStatistics(HeapStatistics* stats);

@@ -517,7 +517,7 @@ public:
    void free(GCAllocation* alloc);

    GCAllocation* allocationFrom(void* ptr);
-    void freeUnmarked(std::vector<Box*>& weakly_referenced);
+    void freeUnmarked(std::vector<Box*>& weakly_referenced, std::vector<BoxedClass*>& classes_to_free);

    void getStatistics(HeapStatistics* stats);

@@ -623,18 +623,7 @@ public:
    void free(GCAllocation* alloc) {
        destructContents(alloc);

-        if (large_arena.contains(alloc)) {
-            large_arena.free(alloc);
-            return;
-        }
-
-        if (huge_arena.contains(alloc)) {
-            huge_arena.free(alloc);
-            return;
-        }
-
-        assert(small_arena.contains(alloc));
-        small_arena.free(alloc);
+        _setFree(alloc);
    }

    // not thread safe:
@@ -654,10 +643,10 @@ public:
    void forEachSmallArenaReference(std::function<void(GCAllocation*, size_t)> f) { small_arena.forEachReference(f); }

    // not thread safe:
-    void freeUnmarked(std::vector<Box*>& weakly_referenced) {
-        small_arena.freeUnmarked(weakly_referenced);
-        large_arena.freeUnmarked(weakly_referenced);
-        huge_arena.freeUnmarked(weakly_referenced);
+    void freeUnmarked(std::vector<Box*>& weakly_referenced, std::vector<BoxedClass*>& classes_to_free) {
+        small_arena.freeUnmarked(weakly_referenced, classes_to_free);
+        large_arena.freeUnmarked(weakly_referenced, classes_to_free);
+        huge_arena.freeUnmarked(weakly_referenced, classes_to_free);
    }

    void prepareForCollection() {
@@ -673,6 +662,27 @@ public:
    }

    void dumpHeapStatistics(int level);
+
+private:
+    // Internal function that just marks the allocation as being freed, without doing any
+    // Python-semantics on it.
+    void _setFree(GCAllocation* alloc) {
+        if (large_arena.contains(alloc)) {
+            large_arena.free(alloc);
+            return;
+        }
+
+        if (huge_arena.contains(alloc)) {
+            huge_arena.free(alloc);
+            return;
+        }
+
+        assert(small_arena.contains(alloc));
+        small_arena.free(alloc);
+    }
+
+    friend void markPhase();
+    friend void runCollection();
 };

 extern Heap global_heap;