Commit f58caf8c authored by Kevin Modzelewski's avatar Kevin Modzelewski

Unicode fix: the unicode file caches some objects statically that we have to gc-track

I don't like making extension modules call a function (here called PyGC_AddRoot),
but I think this is something that we can eventually automate (look for stores
to static locations) so I don't mind going with a temporary solution.
parent a8f18433
......@@ -105,6 +105,11 @@ extern "C" {
PyObject* PyModule_GetDict(PyObject*) PYSTON_NOEXCEPT;
// Pyston addition:
// Our goal is to not make exception modules declare their static memory. But until we can identify
// that in an automated way, we have to modify extension modules to call this:
void PyGC_AddRoot(PyObject*) PYSTON_NOEXCEPT;
#define PyDoc_VAR(name) static char name[]
#define PyDoc_STRVAR(name, str) PyDoc_VAR(name) = PyDoc_STR(str)
#define PyDoc_STR(str) str
......
......@@ -53,7 +53,8 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
/* Limit for the Unicode object free list */
#define PyUnicode_MAXFREELIST 1024
// Pyston change: set this to 0 (was 1024) to disable the free list since we can't track that through our GC.
#define PyUnicode_MAXFREELIST 0
/* Limit for the Unicode object free list stay alive optimization.
......@@ -108,8 +109,10 @@ static PyUnicodeObject *unicode_empty = NULL;
Py_INCREF(unicode_empty); \
else { \
unicode_empty = _PyUnicode_New(0); \
if (unicode_empty != NULL) \
if (unicode_empty != NULL) { \
Py_INCREF(unicode_empty); \
PyGC_AddRoot((PyObject*)unicode_empty); \
} \
} \
return (PyObject *)unicode_empty; \
} while (0)
......@@ -474,6 +477,7 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
unicode = _PyUnicode_New(1);
if (!unicode)
return NULL;
PyGC_AddRoot((PyObject*)unicode);
unicode->str[0] = *u;
unicode_latin1[*u] = unicode;
}
......@@ -521,6 +525,7 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
unicode = _PyUnicode_New(1);
if (!unicode)
return NULL;
PyGC_AddRoot((PyObject*)unicode);
unicode->str[0] = Py_CHARMASK(*u);
unicode_latin1[Py_CHARMASK(*u)] = unicode;
}
......@@ -8927,6 +8932,7 @@ void _PyUnicode_Init(void)
unicode_empty = _PyUnicode_New(0);
if (!unicode_empty)
return;
PyGC_AddRoot((PyObject*)unicode_empty);
}
/* initialize the linebreak bloom filter */
......
......@@ -525,6 +525,30 @@ extern "C" Py_ssize_t PySequence_Size(PyObject* s) noexcept {
return -1;
}
extern "C" PyObject* PySequence_Fast(PyObject* v, const char* m) noexcept {
PyObject* it;
if (v == NULL)
return null_error();
if (PyList_CheckExact(v) || PyTuple_CheckExact(v)) {
Py_INCREF(v);
return v;
}
it = PyObject_GetIter(v);
if (it == NULL) {
if (PyErr_ExceptionMatches(PyExc_TypeError))
PyErr_SetString(PyExc_TypeError, m);
return NULL;
}
v = PySequence_List(it);
Py_DECREF(it);
return v;
}
static PyObject* binary_op1(PyObject* v, PyObject* w, const int op_slot) {
PyObject* x;
binaryfunc slotv = NULL;
......
......@@ -131,6 +131,10 @@ void registerPermanentRoot(void* obj) {
#endif
}
extern "C" void PyGC_AddRoot(PyObject* obj) noexcept {
registerPermanentRoot(obj);
}
static std::unordered_set<void*> nonheap_roots;
// Track the highest-addressed nonheap root; the assumption is that the nonheap roots will
// typically all have lower addresses than the heap roots, so this can serve as a cheap
......
......@@ -509,10 +509,6 @@ extern "C" PyObject* PySequence_Tuple(PyObject* o) noexcept {
Py_FatalError("unimplemented");
}
extern "C" PyObject* PySequence_Fast(PyObject* o, const char* m) noexcept {
Py_FatalError("unimplemented");
}
extern "C" PyObject* PyIter_Next(PyObject* iter) noexcept {
static const std::string next_str("next");
try {
......
......@@ -41,3 +41,17 @@ print p(s.encode("utf8"))
print p(s.encode("utf16"))
print p(s.encode("utf32"))
print p(s.encode("iso_8859_15"))
print repr(u' '.join(["hello", "world"]))
# GC test: the unicode module interns certain unicode strings (the empty string among them).
# Make sure we don't end up GCing it.
# Call BaseException().__unicode__() since that happens to be one of the ways to access
# the interned empty string ("unicode_empty")
import gc
for i in xrange(100):
print repr(BaseException().__unicode__())
gc.collect()
# do some allocations:
for j in xrange(100):
[None] * j
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment