Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis

and fix by Guido Vranken.

Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
and fix by Guido Vranken.
cefbe249 · Serhiy Storchaka · a587f40e · cefbe249 · cefbe249 · cefbe249
Commit cefbe249 authored Jan 27, 2015 by Serhiy Storchaka
Hide whitespace changes
Inline Side-by-side

Showing with 56 additions and 12 deletions

Lib/test/test_unicode.py Lib/test/test_unicode.py +40 -0

Misc/NEWS Misc/NEWS +3 -0

Objects/unicodeobject.c Objects/unicodeobject.c +13 -12

No files found.
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1700,6 +1700,9 @@ class UnicodeTest(
        if sys.maxunicode > 0xffff:
            check_format(u'\U0010ffff',
                         b'%c', c_int(0x10ffff))
+        else:
+            with self.assertRaises(OverflowError):
+                PyUnicode_FromFormat(b'%c', c_int(0x10000))
        with self.assertRaises(OverflowError):
            PyUnicode_FromFormat(b'%c', c_int(0x110000))
        # Issue #18183
@@ -1750,8 +1753,45 @@ class UnicodeTest(
                     b'%zu', c_size_t(123))

        # test long output
+        min_long = -(2 ** (8 * sizeof(c_long) - 1))
+        max_long = -min_long - 1
+        check_format(unicode(min_long),
+                     b'%ld', c_long(min_long))
+        check_format(unicode(max_long),
+                     b'%ld', c_long(max_long))
+        max_ulong = 2 ** (8 * sizeof(c_ulong)) - 1
+        check_format(unicode(max_ulong),
+                     b'%lu', c_ulong(max_ulong))
        PyUnicode_FromFormat(b'%p', c_void_p(-1))

+        # test padding (width and/or precision)
+        check_format(u'123'.rjust(10, u'0'),
+                     b'%010i', c_int(123))
+        check_format(u'123'.rjust(100),
+                     b'%100i', c_int(123))
+        check_format(u'123'.rjust(100, u'0'),
+                     b'%.100i', c_int(123))
+        check_format(u'123'.rjust(80, u'0').rjust(100),
+                     b'%100.80i', c_int(123))
+
+        check_format(u'123'.rjust(10, u'0'),
+                     b'%010u', c_uint(123))
+        check_format(u'123'.rjust(100),
+                     b'%100u', c_uint(123))
+        check_format(u'123'.rjust(100, u'0'),
+                     b'%.100u', c_uint(123))
+        check_format(u'123'.rjust(80, u'0').rjust(100),
+                     b'%100.80u', c_uint(123))
+
+        check_format(u'123'.rjust(10, u'0'),
+                     b'%010x', c_int(0x123))
+        check_format(u'123'.rjust(100),
+                     b'%100x', c_int(0x123))
+        check_format(u'123'.rjust(100, u'0'),
+                     b'%.100x', c_int(0x123))
+        check_format(u'123'.rjust(80, u'0').rjust(100),
+                     b'%100.80x', c_int(0x123))
+
        # test %V
        check_format(u'repr=abc',
                     b'repr=%V', u'abc', b'xyz')

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 2.7.10?
 Core and Builtins
 -----------------

+- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV.  Analysis
+  and fix by Guido Vranken.
+
 - Issue #23048: Fix jumping out of an infinite while loop in the pdb.

 Library

--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -735,15 +735,10 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
      * objects once during step 3 and put the result in an array) */
    for (f = format; *f; f++) {
         if (*f == '%') {
-             if (*(f+1)=='%')
-                 continue;
-             if (*(f+1)=='S' || *(f+1)=='R')
-                 ++callcount;
-             while (isdigit((unsigned)*f))
-                 width = (width*10) + *f++ - '0';
-             while (*++f && *f != '%' && !isalpha((unsigned)*f))
-                 ;
-             if (*f == 's')
+             f++;
+             while (*f && *f != '%' && !isalpha((unsigned)*f))
+                 f++;
+             if (*f == 's' || *f=='S' || *f=='R')
                 ++callcount;
         }
    }
@@ -760,12 +755,16 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
    /* step 3: figure out how large a buffer we need */
    for (f = format; *f; f++) {
        if (*f == '%') {
-            const char* p = f;
+            const char* p = f++;
            width = 0;
            while (isdigit((unsigned)*f))
                width = (width*10) + *f++ - '0';
-            while (*++f && *f != '%' && !isalpha((unsigned)*f))
-                ;
+            precision = 0;
+            if (*f == '.') {
+                f++;
+                while (isdigit((unsigned)*f))
+                    precision = (precision*10) + *f++ - '0';
+            }

            /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
             * they don't affect the amount of space we reserve.
@@ -800,6 +799,8 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
                break;
            case 'd': case 'u': case 'i': case 'x':
                (void) va_arg(count, int);
+                if (width < precision)
+                    width = precision;
                /* 20 bytes is enough to hold a 64-bit
                   integer.  Decimal takes the most space.
                   This isn't enough for octal.