Commit 6010e456 authored by Tim Peters's avatar Tim Peters

SF 1055820: weakref callback vs gc vs threads

In cyclic gc, clear weakrefs to unreachable objects before allowing any
Python code (weakref callbacks or __del__ methods) to run.

This is a critical bugfix, affecting all versions of Python since weakrefs
were introduced.  I'll backport to 2.3.
parent 57c2d6c2
...@@ -9,11 +9,31 @@ extern "C" { ...@@ -9,11 +9,31 @@ extern "C" {
typedef struct _PyWeakReference PyWeakReference; typedef struct _PyWeakReference PyWeakReference;
/* PyWeakReference is the base struct for the Python ReferenceType, ProxyType,
* and CallableProxyType.
*/
struct _PyWeakReference { struct _PyWeakReference {
PyObject_HEAD PyObject_HEAD
/* The object to which this is a weak reference, or Py_None if none.
* Note that this is a stealth reference: wr_object's refcount is
* not incremented to reflect this pointer.
*/
PyObject *wr_object; PyObject *wr_object;
/* A callable to invoke when wr_object dies, or NULL if none. */
PyObject *wr_callback; PyObject *wr_callback;
/* A cache for wr_object's hash code. As usual for hashes, this is -1
* if the hash code isn't known yet.
*/
long hash; long hash;
/* If wr_object is weakly referenced, wr_object has a doubly-linked NULL-
* terminated list of weak references to it. These are the list pointers.
* If wr_object goes away, wr_object is set to Py_None, and these pointers
* have no meaning then.
*/
PyWeakReference *wr_prev; PyWeakReference *wr_prev;
PyWeakReference *wr_next; PyWeakReference *wr_next;
}; };
......
from test.test_support import verify, verbose, TestFailed, vereq from test.test_support import verify, verbose, TestFailed, vereq
import sys import sys
import gc import gc
import weakref
def expect(actual, expected, name): def expect(actual, expected, name):
if actual != expected: if actual != expected:
...@@ -369,6 +370,191 @@ def test_get_referents(): ...@@ -369,6 +370,191 @@ def test_get_referents():
expect(gc.get_referents(1, 'a', 4j), [], "get_referents") expect(gc.get_referents(1, 'a', 4j), [], "get_referents")
# Bug 1055820 has several tests of longstanding bugs involving weakrefs and
# cyclic gc.
# An instance of C1055820 has a self-loop, so becomes cyclic trash when
# unreachable.
class C1055820(object):
def __init__(self, i):
self.i = i
self.loop = self
class GC_Detector(object):
# Create an instance I. Then gc hasn't happened again so long as
# I.gc_happened is false.
def __init__(self):
self.gc_happened = False
def it_happened(ignored):
self.gc_happened = True
# Create a piece of cyclic trash that triggers it_happened when
# gc collects it.
self.wr = weakref.ref(C1055820(666), it_happened)
def test_bug1055820b():
# Corresponds to temp2b.py in the bug report.
ouch = []
def callback(ignored):
ouch[:] = [wr() for wr in WRs]
Cs = [C1055820(i) for i in range(2)]
WRs = [weakref.ref(c, callback) for c in Cs]
c = None
gc.collect()
expect(len(ouch), 0, "bug1055820b")
# Make the two instances trash, and collect again. The bug was that
# the callback materialized a strong reference to an instance, but gc
# cleared the instance's dict anyway.
Cs = None
gc.collect()
expect(len(ouch), 2, "bug1055820b") # else the callbacks didn't run
for x in ouch:
# If the callback resurrected one of these guys, the instance
# would be damaged, with an empty __dict__.
expect(x, None, "bug1055820b")
def test_bug1055820c():
# Corresponds to temp2c.py in the bug report. This is pretty elaborate.
c0 = C1055820(0)
# Move c0 into generation 2.
gc.collect()
c1 = C1055820(1)
c1.keep_c0_alive = c0
del c0.loop # now only c1 keeps c0 alive
c2 = C1055820(2)
c2wr = weakref.ref(c2) # no callback!
ouch = []
def callback(ignored):
ouch[:] = [c2wr()]
# The callback gets associated with a wr on an object in generation 2.
c0wr = weakref.ref(c0, callback)
c0 = c1 = c2 = None
# What we've set up: c0, c1, and c2 are all trash now. c0 is in
# generation 2. The only thing keeping it alive is that c1 points to it.
# c1 and c2 are in generation 0, and are in self-loops. There's a global
# weakref to c2 (c2wr), but that weakref has no callback. There's also
# a global weakref to c0 (c0wr), and that does have a callback, and that
# callback references c2 via c2wr().
#
# c0 has a wr with callback, which references c2wr
# ^
# |
# | Generation 2 above dots
#. . . . . . . .|. . . . . . . . . . . . . . . . . . . . . . . .
# | Generation 0 below dots
# |
# |
# ^->c1 ^->c2 has a wr but no callback
# | | | |
# <--v <--v
#
# So this is the nightmare: when generation 0 gets collected, we see that
# c2 has a callback-free weakref, and c1 doesn't even have a weakref.
# Collecting generation 0 doesn't see c0 at all, and c0 is the only object
# that has a weakref with a callback. gc clears c1 and c2. Clearing c1
# has the side effect of dropping the refcount on c0 to 0, so c0 goes
# away (despite that it's in an older generation) and c0's wr callback
# triggers. That in turn materializes a reference to c2 via c2wr(), but
# c2 gets cleared anyway by gc.
# We want to let gc happen "naturally", to preserve the distinction
# between generations.
junk = []
i = 0
detector = GC_Detector()
while not detector.gc_happened:
i += 1
if i > 10000:
raise TestFailed("gc didn't happen after 10000 iterations")
expect(len(ouch), 0, "bug1055820c")
junk.append([]) # this will eventually trigger gc
expect(len(ouch), 1, "bug1055820c") # else the callback wasn't invoked
for x in ouch:
# If the callback resurrected c2, the instance would be damaged,
# with an empty __dict__.
expect(x, None, "bug1055820c")
def test_bug1055820d():
# Corresponds to temp2d.py in the bug report. This is very much like
# test_bug1055820c, but uses a __del__ method instead of a weakref
# callback to sneak in a resurrection of cyclic trash.
ouch = []
class D(C1055820):
def __del__(self):
ouch[:] = [c2wr()]
d0 = D(0)
# Move all the above into generation 2.
gc.collect()
c1 = C1055820(1)
c1.keep_d0_alive = d0
del d0.loop # now only c1 keeps d0 alive
c2 = C1055820(2)
c2wr = weakref.ref(c2) # no callback!
d0 = c1 = c2 = None
# What we've set up: d0, c1, and c2 are all trash now. d0 is in
# generation 2. The only thing keeping it alive is that c1 points to it.
# c1 and c2 are in generation 0, and are in self-loops. There's a global
# weakref to c2 (c2wr), but that weakref has no callback. There are no
# other weakrefs.
#
# d0 has a __del__ method that references c2wr
# ^
# |
# | Generation 2 above dots
#. . . . . . . .|. . . . . . . . . . . . . . . . . . . . . . . .
# | Generation 0 below dots
# |
# |
# ^->c1 ^->c2 has a wr but no callback
# | | | |
# <--v <--v
#
# So this is the nightmare: when generation 0 gets collected, we see that
# c2 has a callback-free weakref, and c1 doesn't even have a weakref.
# Collecting generation 0 doesn't see d0 at all. gc clears c1 and c2.
# Clearing c1 has the side effect of dropping the refcount on d0 to 0, so
# d0 goes away (despite that it's in an older generation) and d0's __del__
# triggers. That in turn materializes a reference to c2 via c2wr(), but
# c2 gets cleared anyway by gc.
# We want to let gc happen "naturally", to preserve the distinction
# between generations.
detector = GC_Detector()
junk = []
i = 0
while not detector.gc_happened:
i += 1
if i > 10000:
raise TestFailed("gc didn't happen after 10000 iterations")
expect(len(ouch), 0, "bug1055820d")
junk.append([]) # this will eventually trigger gc
expect(len(ouch), 1, "bug1055820d") # else __del__ wasn't invoked
for x in ouch:
# If __del__ resurrected c2, the instance would be damaged, with an
# empty __dict__.
expect(x, None, "bug1055820d")
def test_all(): def test_all():
gc.collect() # Delete 2nd generation garbage gc.collect() # Delete 2nd generation garbage
run_test("lists", test_list) run_test("lists", test_list)
...@@ -392,6 +578,19 @@ def test_all(): ...@@ -392,6 +578,19 @@ def test_all():
run_test("boom_new", test_boom_new) run_test("boom_new", test_boom_new)
run_test("boom2_new", test_boom2_new) run_test("boom2_new", test_boom2_new)
run_test("get_referents", test_get_referents) run_test("get_referents", test_get_referents)
run_test("bug1055820b", test_bug1055820b)
gc.enable()
try:
run_test("bug1055820c", test_bug1055820c)
finally:
gc.disable()
gc.enable()
try:
run_test("bug1055820d", test_bug1055820d)
finally:
gc.disable()
def test(): def test():
if verbose: if verbose:
......
...@@ -32,6 +32,17 @@ License Version 2. ...@@ -32,6 +32,17 @@ License Version 2.
Core and builtins Core and builtins
----------------- -----------------
- Bug #1055820 Cyclic garbage collection was not protecting against that
calling a live weakref to a piece of cyclic trash could resurrect an
insane mutation of the trash if any Python code ran during gc (via
running a dead object's __del__ method, running another callback on a
weakref to a dead object, or via any Python code run in any other thread
that managed to obtain the GIL while a __del__ or callback was running
in the thread doing gc). The most likely symptom was "impossible"
``AttributeEror`` exceptions, appearing seemingly at random, on weakly
referenced objects. The cure was to clear all weakrefs to unreachable
objects before allowing any callbacks to run.
- Bug #1054139 _PyString_Resize() now invalidates its cached hash value. - Bug #1054139 _PyString_Resize() now invalidates its cached hash value.
Extension Modules Extension Modules
......
Intro
=====
The basic rule for dealing with weakref callbacks (and __del__ methods too,
for that matter) during cyclic gc:
Once gc has computed the set of unreachable objects, no Python-level
code can be allowed to access an unreachable object.
If that can happen, then the Python code can resurrect unreachable objects
too, and gc can't detect that without starting over. Since gc eventually
runs tp_clear on all unreachable objects, if an unreachable object is
resurrected then tp_clear will eventually be called on it (or may already
have been called before resurrection). At best (and this has been an
historically common bug), tp_clear empties an instance's __dict__, and
"impossible" AttributeErrors result. At worst, tp_clear leaves behind an
insane object at the C level, and segfaults result (historically, most
often by setting a new-style class's mro pointer to NULL, after which
attribute lookups performed by the class can segfault).
OTOH, it's OK to run Python-level code that can't access unreachable
objects, and sometimes that's necessary. The chief example is the callback
attached to a reachable weakref W to an unreachable object O. Since O is
going away, and W is still alive, the callback must be invoked. Because W
is still alive, everything reachable from its callback is also reachable,
so it's also safe to invoke the callback (although that's trickier than it
sounds, since other reachable weakrefs to other unreachable objects may
still exist, and be accessible to the callback -- there are lots of painful
details like this covered in the rest of this file).
Python 2.4/2.3.5
================
The "Before 2.3.3" section below turned out to be wrong in some ways, but
I'm leaving it as-is because it's more right than wrong, and serves as a
wonderful example of how painful analysis can miss not only the forest for
the trees, but also miss the trees for the aphids sucking the trees
dry <wink>.
The primary thing it missed is that when a weakref to a piece of cyclic
trash (CT) exists, then any call to any Python code whatsoever can end up
materializing a strong reference to that weakref's CT referent, and so
possibly resurrect an insane object (one for which cyclic gc has called-- or
will call before it's done --tp_clear()). It's not even necessarily that a
weakref callback or __del__ method does something nasty on purpose: as
soon as we execute Python code, threads other than the gc thread can run
too, and they can do ordinary things with weakrefs that end up resurrecting
CT while gc is running.
http://www.python.org/sf/1055820
shows how innocent it can be, and also how nasty. Variants of the three
focussed test cases attached to that bug report are now part of Python's
standard Lib/test/test_gc.py.
Jim Fulton gave the best nutshell summary of the new (in 2.4 and 2.3.5)
approach:
Clearing cyclic trash can call Python code. If there are weakrefs to
any of the cyclic trash, then those weakrefs can be used to resurrect
the objects. Therefore, *before* clearing cyclic trash, we need to
remove any weakrefs. If any of the weakrefs being removed have
callbacks, then we need to save the callbacks and call them *after* all
of the weakrefs have been cleared.
Alas, doing just that much doesn't work, because it overlooks what turned
out to be the much subtler problems that were fixed earlier, and described
below. We do clear all weakrefs to CT now before breaking cycles, but not
all callbacks encountered can be run later. That's explained in horrid
detail below.
Older text follows, with a some later comments in [] brackets:
Before 2.3.3
============
Before 2.3.3, Python's cyclic gc didn't pay any attention to weakrefs. Before 2.3.3, Python's cyclic gc didn't pay any attention to weakrefs.
Segfaults in Zope3 resulted. Segfaults in Zope3 resulted.
...@@ -19,12 +95,19 @@ segfaults) can happen right then, during the callback's execution, or can ...@@ -19,12 +95,19 @@ segfaults) can happen right then, during the callback's execution, or can
happen at any later time if the callback manages to resurrect an insane happen at any later time if the callback manages to resurrect an insane
object. object.
[That missed that, in addition, a weakref to CT can exist outside CT, and
any callback into Python can use such a non-CT weakref to resurrect its CT
referent. The same bad kinds of things can happen then.]
Note that if it's possible for the callback to get at objects in the trash Note that if it's possible for the callback to get at objects in the trash
cycles, it must also be the case that the callback itself is part of the cycles, it must also be the case that the callback itself is part of the
trash cycles. Else the callback would have acted as an external root to trash cycles. Else the callback would have acted as an external root to
the current collection, and nothing reachable from it would be in cyclic the current collection, and nothing reachable from it would be in cyclic
trash either. trash either.
[Except that a non-CT callback can also use a non-CT weakref to get at
CT objects.]
More, if the callback itself is in cyclic trash, then the weakref to which More, if the callback itself is in cyclic trash, then the weakref to which
the callback is attached must also be trash, and for the same kind of the callback is attached must also be trash, and for the same kind of
reason: if the weakref acted as an external root, then the callback could reason: if the weakref acted as an external root, then the callback could
...@@ -47,6 +130,13 @@ cyclic trash, it's defensible to first clear weakrefs with callbacks. It's ...@@ -47,6 +130,13 @@ cyclic trash, it's defensible to first clear weakrefs with callbacks. It's
a feature of Python's weakrefs too that when a weakref goes away, the a feature of Python's weakrefs too that when a weakref goes away, the
callback (if any) associated with it is thrown away too, unexecuted. callback (if any) associated with it is thrown away too, unexecuted.
[In 2.4/2.3.5, we first clear all weakrefs to CT objects, whether or not
those weakrefs are themselves CT, and whether or not they have callbacks.
The callbacks (if any) on non-CT weakrefs (if any) are invoked later,
after all weakrefs-to-CT have been cleared. The callbacks (if any) on CT
weakrefs (if any) are never invoked, for the excruciating reasons
explained here.]
Just that much is almost enough to prevent problems, by throwing away Just that much is almost enough to prevent problems, by throwing away
*almost* all the weakref callbacks that could get triggered by gc. The *almost* all the weakref callbacks that could get triggered by gc. The
problem remaining is that clearing a weakref with a callback decrefs the problem remaining is that clearing a weakref with a callback decrefs the
...@@ -56,7 +146,15 @@ weakrefs can trigger callbacks attached to other weakrefs, and those ...@@ -56,7 +146,15 @@ weakrefs can trigger callbacks attached to other weakrefs, and those
latter weakrefs may or may not be part of cyclic trash. latter weakrefs may or may not be part of cyclic trash.
So, to prevent any Python code from running while gc is invoking tp_clear() So, to prevent any Python code from running while gc is invoking tp_clear()
on all the objects in cyclic trash, it's not quite enough just to invoke on all the objects in cyclic trash,
[That was always wrong: we can't stop Python code from running when gc
is breaking cycles. If an object with a __del__ method is not itself in
a cycle, but is reachable only from CT, then breaking cycles will, as a
matter of course, drop the refcount on that object to 0, and its __del__
will run right then. What we can and must stop is running any Python
code that could access CT.]
it's not quite enough just to invoke
tp_clear() on weakrefs with callbacks first. Instead the weakref module tp_clear() on weakrefs with callbacks first. Instead the weakref module
grew a new private function (_PyWeakref_ClearRef) that does only part of grew a new private function (_PyWeakref_ClearRef) that does only part of
tp_clear(): it removes the weakref from the weakly-referenced object's list tp_clear(): it removes the weakref from the weakly-referenced object's list
...@@ -65,9 +163,13 @@ _PyWeakref_ClearRef(wr) ensures that wr's callback object will never ...@@ -65,9 +163,13 @@ _PyWeakref_ClearRef(wr) ensures that wr's callback object will never
trigger, and (unlike weakref's tp_clear()) also prevents any callback trigger, and (unlike weakref's tp_clear()) also prevents any callback
associated *with* wr's callback object from triggering. associated *with* wr's callback object from triggering.
[Although we may trigger such callbacks later, as explained below.]
Then we can call tp_clear on all the cyclic objects and never trigger Then we can call tp_clear on all the cyclic objects and never trigger
Python code. Python code.
[As above, not so: it means never trigger Python code that can access CT.]
After we do that, the callback objects still need to be decref'ed. Callbacks After we do that, the callback objects still need to be decref'ed. Callbacks
(if any) *on* the callback objects that were also part of cyclic trash won't (if any) *on* the callback objects that were also part of cyclic trash won't
get invoked, because we cleared all trash weakrefs with callbacks at the get invoked, because we cleared all trash weakrefs with callbacks at the
...@@ -76,6 +178,10 @@ acted as external roots to everything reachable from them, so nothing ...@@ -76,6 +178,10 @@ acted as external roots to everything reachable from them, so nothing
reachable from them was part of cyclic trash, so gc didn't do any damage to reachable from them was part of cyclic trash, so gc didn't do any damage to
objects reachable from them, and it's safe to call them at the end of gc. objects reachable from them, and it's safe to call them at the end of gc.
[That's so. In addition, now we also invoke (if any) the callbacks on
non-CT weakrefs to CT objects, during the same pass that decrefs the
callback objects.]
An alternative would have been to treat objects with callbacks like objects An alternative would have been to treat objects with callbacks like objects
with __del__ methods, refusing to collect them, appending them to gc.garbage with __del__ methods, refusing to collect them, appending them to gc.garbage
instead. That would have been much easier. Jim Fulton gave a strong instead. That would have been much easier. Jim Fulton gave a strong
...@@ -105,3 +211,9 @@ __del__ methods is to avoid running finalizers in an arbitrary order). ...@@ -105,3 +211,9 @@ __del__ methods is to avoid running finalizers in an arbitrary order).
However, a weakref callback on a weakref callback has got to be rare. However, a weakref callback on a weakref callback has got to be rare.
It's possible to do such a thing, so gc has to be robust against it, but It's possible to do such a thing, so gc has to be robust against it, but
I doubt anyone has done it outside the test case I wrote for it. I doubt anyone has done it outside the test case I wrote for it.
[The callbacks (if any) on non-CT weakrefs to CT objects are also executed
in an arbitrary order now. But they were before too, depending on the
vagaries of when tp_clear() happened to break enough cycles to trigger
them. People simply shouldn't try to use __del__ or weakref callbacks to
do fancy stuff.]
This diff is collapsed.
...@@ -850,7 +850,9 @@ PyWeakref_GetObject(PyObject *ref) ...@@ -850,7 +850,9 @@ PyWeakref_GetObject(PyObject *ref)
return PyWeakref_GET_OBJECT(ref); return PyWeakref_GET_OBJECT(ref);
} }
/* Note that there's an inlined copy-paste of handle_callback() in gcmodule.c's
* handle_weakrefs().
*/
static void static void
handle_callback(PyWeakReference *ref, PyObject *callback) handle_callback(PyWeakReference *ref, PyObject *callback)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment