Merge pull request #673 from kmod/perf

Unicode-creation optimizations

Merge pull request #673 from kmod/perf
Unicode-creation optimizations
bbc95869 · Kevin Modzelewski · a60a1b9a · 4ab66c61 · bbc95869 · bbc95869
Commit bbc95869 authored Jul 06, 2015 by Kevin Modzelewski
7 changed files
--- a/from_cpython/Include/stringobject.h
+++ b/from_cpython/Include/stringobject.h
@@ -112,10 +112,8 @@ PyAPI_FUNC(int) _PyString_CheckInterned(PyObject *) PYSTON_NOEXCEPT;
 // length of the unicode string, not the length of the bytes it encodes to in the default
 // encoding.
 // So, set up a different function for those callers to use.
-PyAPI_FUNC(Py_ssize_t) _PyString_SizeMacro(PyObject *) PYSTON_NOEXCEPT;
-#define PyString_GET_SIZE(op)  _PyString_SizeMacro((PyObject*)op)
 //#define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval)
-//#define PyString_GET_SIZE(op)  Py_SIZE(op)
+#define PyString_GET_SIZE(op)  Py_SIZE(op)

 /* _PyString_Join(sep, x) is like sep.join(x).  sep must be PyStringObject*,
   x must be an iterable object. */

--- a/from_cpython/Objects/unicodeobject.c
+++ b/from_cpython/Objects/unicodeobject.c
@@ -101,7 +101,7 @@ static PyUnicodeObject *free_list = NULL;
 static int numfree = 0;

 /* The empty Unicode object is shared to improve performance. */
-static PyUnicodeObject *unicode_empty = NULL;
+PyUnicodeObject *unicode_empty = NULL;

 #define _Py_RETURN_UNICODE_EMPTY()                      \
    do {                                                \
@@ -317,76 +317,7 @@ int unicode_resize(register PyUnicodeObject *unicode,

 */

-static
-PyUnicodeObject *_PyUnicode_New(Py_ssize_t length)
-{
-    register PyUnicodeObject *unicode;
-
-    /* Optimization for empty strings */
-    if (length == 0 && unicode_empty != NULL) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
-
-    /* Ensure we won't overflow the size. */
-    if (length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) {
-        return (PyUnicodeObject *)PyErr_NoMemory();
-    }
-
-    /* Unicode freelist & memory allocation */
-    if (free_list) {
-        unicode = free_list;
-        free_list = *(PyUnicodeObject **)unicode;
-        numfree--;
-        if (unicode->str) {
-            /* Keep-Alive optimization: we only upsize the buffer,
-               never downsize it. */
-            if ((unicode->length < length) &&
-                unicode_resize(unicode, length) < 0) {
-                PyObject_DEL(unicode->str);
-                unicode->str = NULL;
-            }
-        }
-        else {
-            size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
-            unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
-        }
-        PyObject_INIT(unicode, &PyUnicode_Type);
-    }
-    else {
-        size_t new_size;
-        unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
-        if (unicode == NULL)
-            return NULL;
-        new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
-        unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
-    }
-
-    if (!unicode->str) {
-        PyErr_NoMemory();
-        goto onError;
-    }
-    /* Initialize the first element to guard against cases where
-     * the caller fails before initializing str -- unicode_resize()
-     * reads str[0], and the Keep-Alive optimization can keep memory
-     * allocated for str alive across a call to unicode_dealloc(unicode).
-     * We don't want unicode_resize to read uninitialized memory in
-     * that case.
-     */
-    unicode->str[0] = 0;
-    unicode->str[length] = 0;
-    unicode->length = length;
-    unicode->hash = -1;
-    unicode->defenc = NULL;
-    return unicode;
-
-  onError:
-    /* XXX UNREF/NEWREF interface should be more symmetrical */
-    _Py_DEC_REFTOTAL;
-    _Py_ForgetReference((PyObject *)unicode);
-    PyObject_Del(unicode);
-    return NULL;
-}
+extern PyUnicodeObject *_PyUnicode_New(Py_ssize_t length);

 static
 void unicode_dealloc(register PyUnicodeObject *unicode)

--- a/microbenchmarks/re_finditer_bench.py
+++ b/microbenchmarks/re_finditer_bench.py
+import re
+def f():
+    r = re.compile(" ")
+    u = "a b c d"
+    for i in xrange(2000000):
+        r.finditer(u)
+f()
--- a/microbenchmarks/unicode_split_ubench.py
+++ b/microbenchmarks/unicode_split_ubench.py
+def f():
+    u = "a b c d"
+    u2 = u" "
+    for i in xrange(4000000):
+        u.split(u2)
+f()
--- a/src/codegen/entry.cpp
+++ b/src/codegen/entry.cpp
@@ -364,6 +364,7 @@ static void handle_sigprof(int signum) {

 //#define INVESTIGATE_STAT_TIMER "us_timer_in_jitted_code"
 #ifdef INVESTIGATE_STAT_TIMER
+static_assert(STAT_TIMERS, "Stat timers need to be enabled to investigate them");
 static uint64_t* stat_counter = Stats::getStatCounter(INVESTIGATE_STAT_TIMER);
 static void handle_sigprof_investigate_stattimer(int signum) {
    if (StatTimer::getCurrentCounter() == stat_counter)

--- a/src/runtime/str.cpp
+++ b/src/runtime/str.cpp
@@ -2348,16 +2348,6 @@ extern "C" Py_ssize_t PyString_Size(PyObject* op) noexcept {
    return len;
 }

-extern "C" Py_ssize_t _PyString_SizeMacro(PyObject* op) noexcept {
-    if (PyString_Check(op))
-        return static_cast<BoxedString*>(op)->size();
-
-    if (PyUnicode_Check(op))
-        return Py_SIZE(op);
-
-    RELEASE_ASSERT(0, "Need to verify the behavior of PyString_GET_SIZE on %s objects", op->cls->tp_name);
-}
-
 extern "C" int _PyString_Resize(PyObject** pv, Py_ssize_t newsize) noexcept {
    // This is only allowed to be called when there is only one user of the string (ie a refcount of 1 in CPython)


--- a/src/runtime/types.cpp
+++ b/src/runtime/types.cpp
@@ -2850,6 +2850,66 @@ out:
    return result;
 }

+void unicode_visit(GCVisitor* v, Box* b) {
+    boxGCHandler(v, b);
+
+    PyUnicodeObject* u = (PyUnicodeObject*)b;
+    v->visit(u->str);
+    v->visit(u->defenc);
+}
+
+extern "C" PyUnicodeObject* unicode_empty;
+extern "C" PyUnicodeObject* _PyUnicode_New(Py_ssize_t length) noexcept {
+    PyUnicodeObject* unicode;
+
+    /* Optimization for empty strings */
+    if (length == 0 && unicode_empty != NULL) {
+        Py_INCREF(unicode_empty);
+        return unicode_empty;
+    }
+
+    /* Ensure we won't overflow the size. */
+    if (length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) {
+        return (PyUnicodeObject*)PyErr_NoMemory();
+    }
+
+    // Pyston change: allocate ->str first, so that if this allocation
+    // causes a collection, we don't see a half-created unicode object:
+    size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
+    Py_UNICODE* str = (Py_UNICODE*)gc_alloc(new_size, gc::GCKind::UNTRACKED);
+    if (!str)
+        return (PyUnicodeObject*)PyErr_NoMemory();
+
+    // Do a bunch of inlining + constant folding of this line of CPython's:
+    // unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
+    assert(PyUnicode_Type.tp_basicsize == sizeof(PyUnicodeObject)); // use the compile-time constant
+    unicode = (PyUnicodeObject*)gc_alloc(sizeof(PyUnicodeObject), gc::GCKind::PYTHON);
+    if (unicode == NULL)
+        return (PyUnicodeObject*)PyErr_NoMemory();
+
+    // Inline PyObject_INIT:
+    assert(!PyType_SUPPORTS_WEAKREFS(&PyUnicode_Type));
+    assert(!PyUnicode_Type.instancesHaveHCAttrs());
+    assert(!PyUnicode_Type.instancesHaveDictAttrs());
+    unicode->ob_type = (struct _typeobject*)&PyUnicode_Type;
+
+    unicode->str = str;
+
+    /* Initialize the first element to guard against cases where
+     * the caller fails before initializing str -- unicode_resize()
+     * reads str[0], and the Keep-Alive optimization can keep memory
+     * allocated for str alive across a call to unicode_dealloc(unicode).
+     * We don't want unicode_resize to read uninitialized memory in
+     * that case.
+     */
+    unicode->str[0] = 0;
+    unicode->str[length] = 0;
+    unicode->length = length;
+    unicode->hash = -1;
+    unicode->defenc = NULL;
+    return unicode;
+}
+
 bool TRACK_ALLOCATIONS = false;
 void setupRuntime() {

@@ -3349,6 +3409,11 @@ void setupRuntime() {
    weakref_callableproxy->simple_destructor = proxy_to_tp_clear;
    weakref_callableproxy->is_pyston_class = true;

+    unicode_cls->tp_alloc = PystonType_GenericAlloc;
+    unicode_cls->gc_visit = unicode_visit;
+    unicode_cls->tp_dealloc = NULL;
+    unicode_cls->is_pyston_class = true;
+
    assert(object_cls->tp_setattro == PyObject_GenericSetAttr);
    assert(none_cls->tp_setattro == PyObject_GenericSetAttr);