Unicode fix: the unicode file caches some objects statically that we have to gc-track

I don't like making extension modules call a function (here called PyGC_AddRoot), but I think this is something that we can eventually automate (look for stores to static locations) so I don't mind going with a temporary solution.

Unicode fix: the unicode file caches some objects statically that we have to gc-track
I don't like making extension modules call a function (here called PyGC_AddRoot), but I think this is something that we can eventually automate (look for stores to static locations) so I don't mind going with a temporary solution.
f58caf8c · Kevin Modzelewski · a8f18433 · f58caf8c · f58caf8c · f58caf8c
Commit f58caf8c authored Feb 26, 2015 by Kevin Modzelewski
6 changed files
--- a/from_cpython/Include/Python.h
+++ b/from_cpython/Include/Python.h
@@ -105,6 +105,11 @@ extern "C" {

 PyObject* PyModule_GetDict(PyObject*) PYSTON_NOEXCEPT;

+// Pyston addition:
+// Our goal is to not make exception modules declare their static memory.  But until we can identify
+// that in an automated way, we have to modify extension modules to call this:
+void PyGC_AddRoot(PyObject*) PYSTON_NOEXCEPT;
+
 #define PyDoc_VAR(name) static char name[]
 #define PyDoc_STRVAR(name, str) PyDoc_VAR(name) = PyDoc_STR(str)
 #define PyDoc_STR(str) str

--- a/from_cpython/Objects/unicodeobject.c
+++ b/from_cpython/Objects/unicodeobject.c
@@ -53,7 +53,8 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

 /* Limit for the Unicode object free list */

-#define PyUnicode_MAXFREELIST       1024
+// Pyston change: set this to 0 (was 1024) to disable the free list since we can't track that through our GC.
+#define PyUnicode_MAXFREELIST       0

 /* Limit for the Unicode object free list stay alive optimization.

@@ -108,8 +109,10 @@ static PyUnicodeObject *unicode_empty = NULL;
            Py_INCREF(unicode_empty);                   \
        else {                                          \
            unicode_empty = _PyUnicode_New(0);          \
-            if (unicode_empty != NULL)                  \
+            if (unicode_empty != NULL) {                \
                Py_INCREF(unicode_empty);               \
+                PyGC_AddRoot((PyObject*)unicode_empty); \
+            }                                           \
        }                                               \
        return (PyObject *)unicode_empty;               \
    } while (0)
@@ -474,6 +477,7 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
                unicode = _PyUnicode_New(1);
                if (!unicode)
                    return NULL;
+                PyGC_AddRoot((PyObject*)unicode);
                unicode->str[0] = *u;
                unicode_latin1[*u] = unicode;
            }
@@ -521,6 +525,7 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
                unicode = _PyUnicode_New(1);
                if (!unicode)
                    return NULL;
+                PyGC_AddRoot((PyObject*)unicode);
                unicode->str[0] = Py_CHARMASK(*u);
                unicode_latin1[Py_CHARMASK(*u)] = unicode;
            }
@@ -8927,6 +8932,7 @@ void _PyUnicode_Init(void)
        unicode_empty = _PyUnicode_New(0);
        if (!unicode_empty)
            return;
+        PyGC_AddRoot((PyObject*)unicode_empty);
    }

    /* initialize the linebreak bloom filter */

--- a/src/capi/abstract.cpp
+++ b/src/capi/abstract.cpp
@@ -525,6 +525,30 @@ extern "C" Py_ssize_t PySequence_Size(PyObject* s) noexcept {
    return -1;
 }

+extern "C" PyObject* PySequence_Fast(PyObject* v, const char* m) noexcept {
+    PyObject* it;
+
+    if (v == NULL)
+        return null_error();
+
+    if (PyList_CheckExact(v) || PyTuple_CheckExact(v)) {
+        Py_INCREF(v);
+        return v;
+    }
+
+    it = PyObject_GetIter(v);
+    if (it == NULL) {
+        if (PyErr_ExceptionMatches(PyExc_TypeError))
+            PyErr_SetString(PyExc_TypeError, m);
+        return NULL;
+    }
+
+    v = PySequence_List(it);
+    Py_DECREF(it);
+
+    return v;
+}
+
 static PyObject* binary_op1(PyObject* v, PyObject* w, const int op_slot) {
    PyObject* x;
    binaryfunc slotv = NULL;

--- a/src/gc/collector.cpp
+++ b/src/gc/collector.cpp
@@ -131,6 +131,10 @@ void registerPermanentRoot(void* obj) {
 #endif
 }

+extern "C" void PyGC_AddRoot(PyObject* obj) noexcept {
+    registerPermanentRoot(obj);
+}
+
 static std::unordered_set<void*> nonheap_roots;
 // Track the highest-addressed nonheap root; the assumption is that the nonheap roots will
 // typically all have lower addresses than the heap roots, so this can serve as a cheap

--- a/src/runtime/capi.cpp
+++ b/src/runtime/capi.cpp
@@ -509,10 +509,6 @@ extern "C" PyObject* PySequence_Tuple(PyObject* o) noexcept {
    Py_FatalError("unimplemented");
 }

-extern "C" PyObject* PySequence_Fast(PyObject* o, const char* m) noexcept {
-    Py_FatalError("unimplemented");
-}
-
 extern "C" PyObject* PyIter_Next(PyObject* iter) noexcept {
    static const std::string next_str("next");
    try {

--- a/test/tests/unicode_test.py
+++ b/test/tests/unicode_test.py
@@ -41,3 +41,17 @@ print p(s.encode("utf8"))
 print p(s.encode("utf16"))
 print p(s.encode("utf32"))
 print p(s.encode("iso_8859_15"))
+
+print repr(u' '.join(["hello", "world"]))
+
+# GC test: the unicode module interns certain unicode strings (the empty string among them).
+# Make sure we don't end up GCing it.
+# Call BaseException().__unicode__() since that happens to be one of the ways to access
+# the interned empty string ("unicode_empty")
+import gc
+for i in xrange(100):
+    print repr(BaseException().__unicode__())
+    gc.collect()
+    # do some allocations:
+    for j in xrange(100):
+        [None] * j