Commit 61b18a40 by Kirill Smelkov

bigfile/py/loadblk: Replace pybuf with a stub object in calling frame in case it stays alive

It turns out some code wants to store tracebacks e.g. for further
logging/whatever. This way GC won't help to free up references to pybuf.
However if pybuf remain referenced only from calling frames, we can
change there reference to pybuf to a stub object "<pybuf>" and this way
remove the reference.

With added test but without loadblk changes the failure would be as:

    pybigfile_loadblk WARN: pybuf->ob_refcnt != 1 even after GC:
    pybuf (ob_refcnt=2):    <read-write buffer ptr 0x7fae4911f000, size 2097152 at 0x7fae4998cef0>
    pybuf referrers:        [<frame object at 0x556daff41aa0>]		<-- NOTE
    bigfile/_bigfile.c:613 pybigfile_loadblk        BUG!
1 parent f01b27d2
......@@ -28,7 +28,7 @@
#include <Python.h>
#include "structmember.h"
typedef struct _frame PyFrameObject;
#include "frameobject.h"
#include <wendelin/bigfile/file.h>
#include <wendelin/bigfile/virtmem.h>
......@@ -37,6 +37,7 @@ typedef struct _frame PyFrameObject;
#include <wendelin/compat_py2.h>
static PyObject *gcmodule;
static PyObject *pybuf_str;
/* whether to pass old buffer instead of memoryview to .loadblk() / .storeblk()
*
......@@ -126,6 +127,9 @@ static PyObject* /* PyListObject* */ XPyObject_GetReferrers(PyObject *obj);
/* print objects that refer to obj */
static void XPyObject_PrintReferrers(PyObject *obj, FILE *fp);
/* check whether frame f is a callee of top */
static int XPyFrame_IsCalleeOf(PyFrameObject *f, PyFrameObject *top);
/************
* PyVMA *
......@@ -551,8 +555,8 @@ out:
*
* and some of the frames continue to hold pybuf reference.
*
* Do full GC to collect such cycles this way removing references to
* pybuf.
* Do full GC to collect such, and possibly other, cycles this way
* removing references to pybuf.
*/
if (pybuf->ob_refcnt != 1) {
PyGC_Collect();
......@@ -565,6 +569,57 @@ out:
XPyErr_FullClear();
}
/* the story continues here - a traceback might be also explicitly
* saved by code somewhere this way not going away after GC. Let's
* find objects that refer to pybuf, and for frames called by loadblk()
* change pybuf to another stub object (we know there we can do it safely) */
if (pybuf->ob_refcnt != 1) {
PyObject *pybuf_users = XPyObject_GetReferrers(pybuf);
int i, j;
for (i = 0; i < PyList_GET_SIZE(pybuf_users); i++) {
PyObject *user = PyList_GET_ITEM(pybuf_users, i);
PyObject **fastlocals;
PyFrameObject *f;
/* if it was the frame used for our calling to py loadblk() we
* can replace pybuf to "<pybuf>" there in loadblk arguments */
if (PyFrame_Check(user)) {
f = (PyFrameObject *)user;
if (!XPyFrame_IsCalleeOf(f, ts->frame))
continue;
/* "fast" locals (->f_localsplus) */
fastlocals = f->f_localsplus;
for (j = f->f_code->co_nlocals; j >= 0; --j) {
if (fastlocals[j] == pybuf) {
Py_INCREF(pybuf_str);
fastlocals[j] = pybuf_str;
Py_DECREF(pybuf);
}
}
/* ->f_locals */
if (f->f_locals != NULL) {
TODO(!PyDict_CheckExact(f->f_locals));
PyObject *key, *value;
Py_ssize_t pos = 0;
while (PyDict_Next(f->f_locals, &pos, &key, &value)) {
if (value == pybuf) {
int err;
err = PyDict_SetItem(f->f_locals, key, pybuf_str);
BUG_ON(err == -1);
}
}
}
}
}
Py_DECREF(pybuf_users);
}
/* now it is real bug if pybuf remains referenced from somewhere */
if (pybuf->ob_refcnt != 1) {
WARN("pybuf->ob_refcnt != 1 even after GC:");
......@@ -916,6 +971,10 @@ _init_bigfile(void)
if (!gcmodule)
return NULL;
pybuf_str = PyUnicode_FromString("<pybuf>");
if (!pybuf_str)
return NULL;
return m;
}
......@@ -995,3 +1054,13 @@ XPyObject_PrintReferrers(PyObject *obj, FILE *fp)
PyObject_Print(obj_referrers, fp, 0);
Py_DECREF(obj_referrers);
}
static int
XPyFrame_IsCalleeOf(PyFrameObject *f, PyFrameObject *top)
{
for (; f; f = f->f_back)
if (f == top)
return 1;
return 0;
}
......@@ -126,6 +126,7 @@ def test_basic():
# test that python exception state is preserved across pagefaulting
def test_pagefault_savestate():
keep = []
class BadFile(BigFile):
def loadblk(self, blk, buf):
# simulate some errors in-between to overwrite thread exception
......@@ -150,9 +151,28 @@ def test_pagefault_savestate():
# will detect and handle this situation via garbage-collecting
# above cycle.
# and even if we keep traceback alive it will care to detach buf
# from frame via substituting another stub object inplace of it
exc_traceback.tb_frame.f_locals
keep.append(exc_traceback)
# check same when happenned in function one more level down
self.func(buf)
self.loadblk_run = 1
def func(self, arg):
try:
1/0
except:
_, _, exc_traceback = sys.exc_info()
assert exc_traceback is not None
keep.append(exc_traceback)
f = BadFile(PS)
fh = f.fileh_open()
vma = fh.mmap(0, 1)
......@@ -174,6 +194,9 @@ def test_pagefault_savestate():
assert exc_value is exc_value2
assert exc_tb is exc_tb2
assert keep[0].tb_frame.f_locals['buf'] == "<pybuf>" # the stub object
assert keep[1].tb_frame.f_locals['arg'] == "<pybuf>" # ----//----
# TODO close f
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!