_PyUnicode_Ready() cannot be used on ready strings anymore

* Change its prototype: PyObject* instead of PyUnicodeoObject*. * Remove an old assertion, the result of PyUnicode_READY (_PyUnicode_Ready) must be checked instead

_PyUnicode_Ready() cannot be used on ready strings anymore
* Change its prototype: PyObject* instead of PyUnicodeoObject*. * Remove an old assertion, the result of PyUnicode_READY (_PyUnicode_Ready) must be checked instead
d8f6510a · Victor Stinner · bc8b81bc · d8f6510a · d8f6510a
Commit d8f6510a authored Sep 29, 2011 by Victor Stinner
Hide whitespace changes
Inline Side-by-side

Showing with 24 additions and 30 deletions

Include/unicodeobject.h Include/unicodeobject.h +9 -7

Objects/unicodeobject.c Objects/unicodeobject.c +15 -23

No files found.
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -456,7 +456,7 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
 #define PyUnicode_READY(op)                        \
    (assert(PyUnicode_Check(op)),                       \
     (PyUnicode_IS_READY(op) ?                          \
-      0 : _PyUnicode_Ready((PyUnicodeObject *)(op))))
+      0 : _PyUnicode_Ready((PyObject *)(op))))

 /* Return a maximum character value which is suitable for creating another
   string based on op.  This is always an approximation but more efficient
@@ -497,14 +497,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_New(
    );
 #endif

-/* Initializes the canonical string representation from a the deprected
-   wstr/Py_UNICODE representation.  This function is used to convert
-   unicode objects which were created using the old API to the new flexible
-   format introduced with PEP 393.  The PyUnicode_READY() macro can be
-   more efficient if the string is already ready. */
+/* Initializes the canonical string representation from a the deprecated
+   wstr/Py_UNICODE representation. This function is used to convert Unicode
+   objects which were created using the old API to the new flexible format
+   introduced with PEP 393.
+
+   Don't call this function directly, use the public PyUnicode_READY() macro
+   instead. */
 #ifndef Py_LIMITED_API
 PyAPI_FUNC(int) _PyUnicode_Ready(
-    PyUnicodeObject *unicode    /* Unicode object */
+    PyObject *unicode           /* Unicode object */
    );
 #endif


--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -781,8 +781,9 @@ int unicode_ready_calls = 0;
 #endif

 int
-_PyUnicode_Ready(PyUnicodeObject *unicode)
+_PyUnicode_Ready(PyObject *obj)
 {
+    PyUnicodeObject *unicode = (PyUnicodeObject *)obj;
    wchar_t *end;
    Py_UCS4 maxchar = 0;
    Py_ssize_t num_surrogates;
@@ -790,25 +791,19 @@ _PyUnicode_Ready(PyUnicodeObject *unicode)
    Py_ssize_t length_wo_surrogates;
 #endif

-    assert(PyUnicode_Check(unicode));
-
-    if (unicode->data.any != NULL) {
-        assert(PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND);
-        return 0;
-    }
-
    /* _PyUnicode_Ready() is only intented for old-style API usage where
-     * strings were created using _PyObject_New() and where no canonical
-     * representation (the str field) has been set yet aka strings
-     * which are not yet ready.
-     */
+       strings were created using _PyObject_New() and where no canonical
+       representation (the str field) has been set yet aka strings
+       which are not yet ready. */
+    assert(PyUnicode_Check(obj));
+    assert(!PyUnicode_IS_READY(obj));
+    assert(!PyUnicode_IS_COMPACT(obj));
+    assert(_PyUnicode_KIND(obj) == PyUnicode_WCHAR_KIND);
    assert(_PyUnicode_WSTR(unicode) != NULL);
-    assert(_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND);
-    assert(!PyUnicode_IS_COMPACT(unicode));
-    assert(!PyUnicode_IS_READY(unicode));
-    /* Actually, it should neither be interned nor be anything else: */
-    assert(_PyUnicode_STATE(unicode).interned == 0);
+    assert(unicode->data.any == NULL);
    assert(unicode->_base.utf8 == NULL);
+    /* Actually, it should neither be interned nor be anything else: */
+    assert(_PyUnicode_STATE(unicode).interned == SSTATE_NOT_INTERNED);

 #ifdef Py_DEBUG
    ++unicode_ready_calls;
@@ -816,11 +811,8 @@ _PyUnicode_Ready(PyUnicodeObject *unicode)

    end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode);
    if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end,
-                                                  &maxchar,
-                                                  &num_surrogates) == -1) {
-        assert(0 && "PyUnicode_FindMaxCharAndNumSurrogatePairs failed");
+                                &maxchar, &num_surrogates) == -1)
        return -1;
-    }

    if (maxchar < 256) {
        unicode->data.any = PyObject_MALLOC(_PyUnicode_WSTR_LENGTH(unicode) + 1);
@@ -1046,8 +1038,8 @@ PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)

    /* If not empty and not single character, copy the Unicode data
       into the new object */
-    if (find_maxchar_surrogates(u, u + size, &maxchar,
-                                                  &num_surrogates) == -1)
+    if (find_maxchar_surrogates(u, u + size,
+                                &maxchar, &num_surrogates) == -1)
        return NULL;

    unicode = (PyUnicodeObject *) PyUnicode_New(size - num_surrogates,