Fix "%f" format of str%args if the result is not an ASCII or latin1 string

184252ad · Victor Stinner · 9a77770a · 184252ad · 184252ad
Commit 184252ad authored Jun 16, 2012 by Victor Stinner
Hide whitespace changes
Inline Side-by-side

Showing with 21 additions and 17 deletions

Lib/test/test_format.py Lib/test/test_format.py +2 -0

Objects/unicodeobject.c Objects/unicodeobject.c +19 -17

No files found.
--- a/Lib/test/test_format.py
+++ b/Lib/test/test_format.py
@@ -265,6 +265,8 @@ class FormatTest(unittest.TestCase):
                raise TestFailed('"%*d"%(maxsize, -127) should fail')

    def test_non_ascii(self):
+        testformat("\u20ac=%f", (1.0,), "\u20ac=1.000000")
+
        self.assertEqual(format("abc", "\u2007<5"), "abc\u2007\u2007")
        self.assertEqual(format(123, "\u2007<5"), "123\u2007\u2007")
        self.assertEqual(format(12.3, "\u2007<6"), "12.3\u2007\u2007")

--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1660,34 +1660,34 @@ unicode_putchar(PyObject **p_unicode, Py_ssize_t *pos,
 }

 /* Copy a ASCII or latin1 char* string into a Python Unicode string.
-   Return the length of the input string.

   WARNING: The function doesn't copy the terminating null character and
   doesn't check the maximum character (may write a latin1 character in an
   ASCII string). */
-static Py_ssize_t
-unicode_write_cstr(PyObject *unicode, Py_ssize_t index, const char *str)
+static void
+unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
+                   const char *str, Py_ssize_t len)
 {
    enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
    void *data = PyUnicode_DATA(unicode);
+    const char *end = str + len;

    switch (kind) {
    case PyUnicode_1BYTE_KIND: {
-        Py_ssize_t len = strlen(str);
        assert(index + len <= PyUnicode_GET_LENGTH(unicode));
        memcpy((char *) data + index, str, len);
-        return len;
+        break;
    }
    case PyUnicode_2BYTE_KIND: {
        Py_UCS2 *start = (Py_UCS2 *)data + index;
        Py_UCS2 *ucs2 = start;
        assert(index <= PyUnicode_GET_LENGTH(unicode));

-        for (; *str; ++ucs2, ++str)
+        for (; str < end; ++ucs2, ++str)
            *ucs2 = (Py_UCS2)*str;

        assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
-        return ucs2 - start;
+        break;
    }
    default: {
        Py_UCS4 *start = (Py_UCS4 *)data + index;
@@ -1695,11 +1695,10 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index, const char *str)
        assert(kind == PyUnicode_4BYTE_KIND);
        assert(index <= PyUnicode_GET_LENGTH(unicode));

-        for (; *str; ++ucs4, ++str)
+        for (; str < end; ++ucs4, ++str)
            *ucs4 = (Py_UCS4)*str;

        assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
-        return ucs4 - start;
    }
    }
 }
@@ -2730,17 +2729,18 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
            case 'x':
            case 'p':
            {
-                Py_ssize_t written;
+                Py_ssize_t len;
                /* unused, since we already have the result */
                if (*f == 'p')
                    (void) va_arg(vargs, void *);
                else
                    (void) va_arg(vargs, int);
                /* extract the result from numberresults and append. */
-                written = unicode_write_cstr(string, i, numberresult);
+                len = strlen(numberresult);
+                unicode_write_cstr(string, i, numberresult, len);
                /* skip over the separating '\0' */
-                i += written;
-                numberresult += written;
+                i += len;
+                numberresult += len;
                assert(*numberresult == '\0');
                numberresult++;
                assert(numberresult <= numberresults + numbersize);
@@ -2812,10 +2812,14 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
                PyUnicode_WRITE(kind, data, i++, '%');
                break;
            default:
-                i += unicode_write_cstr(string, i, p);
+            {
+                Py_ssize_t len = strlen(p);
+                unicode_write_cstr(string, i, p, len);
+                i += len;
                assert(i == PyUnicode_GET_LENGTH(string));
                goto end;
            }
+            }
        }
        else {
            assert(i < PyUnicode_GET_LENGTH(string));
@@ -13211,9 +13215,7 @@ formatfloat(PyObject *v, int flags, int prec, int type,
    if (writer) {
        if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
            return -1;
-        memcpy((char*)writer->data + writer->pos * writer->kind,
-               p,
-               len);
+        unicode_write_cstr(writer->buffer, writer->pos, p, len);
        writer->pos += len;
    }
    else