Commit e70f6d88 authored by Rudi Chen's avatar Rudi Chen

Fix class-freed-before-instance-bug.

In some rare instances, class objects can be freed before the last
instance of that class, causing a problem in the sweep phase where
we look at the class of the object being freed.

So we keep unreachable classes around for an extra collection to be
safe.
parent 624bb03b
......@@ -586,6 +586,9 @@ extern "C" PyObject* PystonType_GenericAlloc(BoxedClass* cls, Py_ssize_t nitems)
assert(default_cls->is_pyston_class); \
assert(default_cls->attrs_offset == 0); \
\
/* Don't allocate classes through this -- we need to keep track of all class objects. */ \
assert(default_cls != type_cls); \
\
/* note: we want to use size instead of tp_basicsize, since size is a compile-time constant */ \
void* mem = gc_alloc(size, gc::GCKind::PYTHON); \
assert(mem); \
......
......@@ -42,6 +42,9 @@ FILE* trace_fp;
static std::unordered_set<void*> roots;
static std::vector<std::pair<void*, void*>> potential_root_ranges;
// BoxedClasses in the program that are still needed.
static std::unordered_set<BoxedClass*> class_objects;
static std::unordered_set<void*> nonheap_roots;
// Track the highest-addressed nonheap root; the assumption is that the nonheap roots will
// typically all have lower addresses than the heap roots, so this can serve as a cheap
......@@ -126,8 +129,12 @@ public:
pop_chunk();
assert(cur == end);
return *--cur; // no need for any bounds checks here since we're guaranteed we're CHUNK_SIZE from the start
} else {
// We emptied the stack, but we should prepare a new chunk in case another item
// gets added onto the stack.
get_chunk();
return NULL;
}
return NULL;
}
......@@ -200,7 +207,7 @@ bool isValidGCObject(void* p) {
return al->user_data == p && (al->kind_id == GCKind::CONSERVATIVE_PYTHON || al->kind_id == GCKind::PYTHON);
}
void setIsPythonObject(Box* b) {
void registerPythonObject(Box* b) {
assert(isValidGCMemory(b));
auto al = GCAllocation::fromUserData(b);
......@@ -209,6 +216,11 @@ void setIsPythonObject(Box* b) {
} else {
assert(al->kind_id == GCKind::PYTHON);
}
assert(b->cls);
if (PyType_Check(b)) {
class_objects.insert((BoxedClass*)b);
}
}
GCRootHandle::GCRootHandle() {
......@@ -276,7 +288,7 @@ void GCVisitor::visitPotentialRange(void* const* start, void* const* end) {
}
}
static inline void visitByGCKind(void* p, GCVisitor& visitor) {
static __attribute__((always_inline)) void visitByGCKind(void* p, GCVisitor& visitor) {
assert(((intptr_t)p) % 8 == 0);
GCAllocation* al = GCAllocation::fromUserData(p);
......@@ -310,7 +322,7 @@ static inline void visitByGCKind(void* p, GCVisitor& visitor) {
}
}
static void getMarkPhaseRoots(GCVisitor& visitor) {
static void markRoots(GCVisitor& visitor) {
GC_TRACE_LOG("Looking at the stack\n");
threading::visitAllStacks(&visitor);
......@@ -375,10 +387,39 @@ static void markPhase() {
TraceStack stack(roots);
GCVisitor visitor(&stack);
getMarkPhaseRoots(visitor);
markRoots(visitor);
graphTraversalMarking(stack, visitor);
// Some classes might be unreachable. Unfortunately, we have to keep them around for
// one more collection, because during the sweep phase, instances of unreachable
// classes might still end up looking at the class. So we visit those unreachable
// classes remove them from the list of class objects so that it can be freed
// in the next collection.
std::vector<BoxedClass*> classes_to_remove;
for (BoxedClass* cls : class_objects) {
GCAllocation* al = GCAllocation::fromUserData(cls);
if (!isMarked(al)) {
visitor.visit(cls);
classes_to_remove.push_back(cls);
}
}
// We added new objects to the stack again from visiting classes so we nee to do
// another (mini) traversal.
graphTraversalMarking(stack, visitor);
for (BoxedClass* cls : classes_to_remove) {
class_objects.erase(cls);
}
// The above algorithm could fail if we have a class and a metaclass -- they might
// both have been added to the classes to remove. In case that happens, make sure
// that the metaclass is retained for at least another collection.
for (BoxedClass* cls : classes_to_remove) {
class_objects.insert(cls->cls);
}
#if TRACE_GC_MARKING
fclose(trace_fp);
trace_fp = NULL;
......
......@@ -71,7 +71,7 @@ void enableGC();
bool isValidGCMemory(void* p); // if p is a valid gc-allocated pointer (or a non-heap root)
bool isValidGCObject(void* p); // whether p is valid gc memory and is set to have Python destructor semantics applied
bool isNonheapRoot(void* p);
void setIsPythonObject(Box* b);
void registerPythonObject(Box* b);
// Debugging/validation helpers: if a GC should not happen in certain sections (ex during unwinding),
// use these functions to mark that. This is different from disableGC/enableGC, since it causes an
......
......@@ -117,6 +117,7 @@ void _bytesAllocatedTripped() {
return;
threading::GLPromoteRegion _lock;
runCollection();
}
......@@ -140,6 +141,7 @@ __attribute__((always_inline)) bool _doFree(GCAllocation* al, std::vector<Box*>*
VALGRIND_ENABLE_ERROR_REPORTING;
#endif
assert(b->cls);
if (PyType_SUPPORTS_WEAKREFS(b->cls)) {
PyWeakReference** list = (PyWeakReference**)PyObject_GET_WEAKREFS_LISTPTR(b);
if (list && *list) {
......
......@@ -2756,11 +2756,11 @@ extern "C" PyVarObject* PyObject_InitVar(PyVarObject* op, PyTypeObject* tp, Py_s
assert(gc::isValidGCMemory(op));
assert(gc::isValidGCObject(tp));
gc::setIsPythonObject(op);
Py_TYPE(op) = tp;
op->ob_size = size;
gc::registerPythonObject(op);
return op;
}
......@@ -2771,10 +2771,10 @@ extern "C" PyObject* PyObject_Init(PyObject* op, PyTypeObject* tp) noexcept {
assert(gc::isValidGCMemory(op));
assert(gc::isValidGCObject(tp));
gc::setIsPythonObject(op);
Py_TYPE(op) = tp;
gc::registerPythonObject(op);
if (PyType_SUPPORTS_WEAKREFS(tp)) {
*PyObject_GET_WEAKREFS_LISTPTR(op) = NULL;
}
......
import gc
# Dynamically create new classes and instances of those classes in such a way
# that both the class object and the instance object will be freed in the same
# garbage collection pass. Hope that this doesn't cause any problems.
def generateClassAndInstances():
for i in xrange(5000):
def method(self, x):
return x + self.i
NewType1 = type("Class1_" + str(i), (),
dict(a={}, b=range(10), i=1, f=method))
NewType2 = type("Class2_" + str(i), (object,),
dict(a={}, b=range(10), i=2, f=method))
NewType3 = type("Class3_" + str(i), (NewType2,), {})
NewType4 = type("Class4_" + str(i), (NewType3,), {})
NewType5 = type("Class5_" + str(i), (NewType4,), {})
obj1 = NewType1()
obj2 = NewType2()
obj3 = NewType3()
obj4 = NewType4()
obj5 = NewType5()
generateClassAndInstances()
gc.collect()
gc.collect()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment