Commit 2bef1b6b authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis

and fix by Guido Vranken.
parent 8eca47ea
...@@ -1661,7 +1661,10 @@ class UnicodeTest(string_tests.CommonTest, ...@@ -1661,7 +1661,10 @@ class UnicodeTest(string_tests.CommonTest,
# Test PyUnicode_FromFormat() # Test PyUnicode_FromFormat()
def test_from_format(self): def test_from_format(self):
support.import_module('ctypes') support.import_module('ctypes')
from ctypes import pythonapi, py_object, c_int from ctypes import (
pythonapi, py_object, sizeof,
c_int, c_long, c_longlong, c_ssize_t,
c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
if sys.maxunicode == 65535: if sys.maxunicode == 65535:
name = "PyUnicodeUCS2_FromFormat" name = "PyUnicodeUCS2_FromFormat"
else: else:
...@@ -1675,9 +1678,13 @@ class UnicodeTest(string_tests.CommonTest, ...@@ -1675,9 +1678,13 @@ class UnicodeTest(string_tests.CommonTest,
for arg in args) for arg in args)
return _PyUnicode_FromFormat(format, *cargs) return _PyUnicode_FromFormat(format, *cargs)
def check_format(expected, format, *args):
text = PyUnicode_FromFormat(format, *args)
self.assertEqual(expected, text)
# ascii format, non-ascii argument # ascii format, non-ascii argument
text = PyUnicode_FromFormat(b'ascii\x7f=%U', 'unicode\xe9') check_format('ascii\x7f=unicode\xe9',
self.assertEqual(text, 'ascii\x7f=unicode\xe9') b'ascii\x7f=%U', 'unicode\xe9')
# non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV() # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
# raises an error # raises an error
...@@ -1686,25 +1693,131 @@ class UnicodeTest(string_tests.CommonTest, ...@@ -1686,25 +1693,131 @@ class UnicodeTest(string_tests.CommonTest,
'string, got a non-ASCII byte: 0xe9$', 'string, got a non-ASCII byte: 0xe9$',
PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii') PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd') # test "%c"
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff') check_format('\uabcd',
b'%c', c_int(0xabcd))
# other tests check_format('\U0010ffff',
text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff') b'%c', c_int(0x10ffff))
self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'") with self.assertRaises(OverflowError):
PyUnicode_FromFormat(b'%c', c_int(0x110000))
text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz') # Issue #18183
self.assertEqual(text, 'repr=abc') check_format('\U00010000\U00100000',
b'%c%c', c_int(0x10000), c_int(0x100000))
# test "%"
check_format('%',
b'%')
check_format('%',
b'%%')
check_format('%s',
b'%%s')
check_format('[%]',
b'[%%]')
check_format('%abc',
b'%%%s', b'abc')
# test %S
check_format("repr=\u20acABC",
b'repr=%S', '\u20acABC')
# test %R
check_format("repr='\u20acABC'",
b'repr=%R', '\u20acABC')
# test integer formats (%i, %d, %u)
check_format('010',
b'%03i', c_int(10))
check_format('0010',
b'%0.4i', c_int(10))
check_format('-123',
b'%i', c_int(-123))
check_format('-123',
b'%d', c_int(-123))
check_format('-123',
b'%ld', c_long(-123))
check_format('-123',
b'%lld', c_longlong(-123))
check_format('-123',
b'%zd', c_ssize_t(-123))
check_format('123',
b'%u', c_uint(123))
check_format('123',
b'%lu', c_ulong(123))
check_format('123',
b'%llu', c_ulonglong(123))
check_format('123',
b'%zu', c_size_t(123))
# test long output
min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
max_longlong = -min_longlong - 1
check_format(str(min_longlong),
b'%lld', c_longlong(min_longlong))
check_format(str(max_longlong),
b'%lld', c_longlong(max_longlong))
max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
check_format(str(max_ulonglong),
b'%llu', c_ulonglong(max_ulonglong))
PyUnicode_FromFormat(b'%p', c_void_p(-1))
# test padding (width and/or precision)
check_format('123'.rjust(10, '0'),
b'%010i', c_int(123))
check_format('123'.rjust(100),
b'%100i', c_int(123))
check_format('123'.rjust(100, '0'),
b'%.100i', c_int(123))
check_format('123'.rjust(80, '0').rjust(100),
b'%100.80i', c_int(123))
check_format('123'.rjust(10, '0'),
b'%010u', c_uint(123))
check_format('123'.rjust(100),
b'%100u', c_uint(123))
check_format('123'.rjust(100, '0'),
b'%.100u', c_uint(123))
check_format('123'.rjust(80, '0').rjust(100),
b'%100.80u', c_uint(123))
check_format('123'.rjust(10, '0'),
b'%010x', c_int(0x123))
check_format('123'.rjust(100),
b'%100x', c_int(0x123))
check_format('123'.rjust(100, '0'),
b'%.100x', c_int(0x123))
check_format('123'.rjust(80, '0').rjust(100),
b'%100.80x', c_int(0x123))
# test %A
check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
# test %V
check_format('repr=abc',
b'repr=%V', 'abc', b'xyz')
# Test string decode from parameter of %s using utf-8. # Test string decode from parameter of %s using utf-8.
# b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
# '\u4eba\u6c11' # '\u4eba\u6c11'
text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91') check_format('repr=\u4eba\u6c11',
self.assertEqual(text, 'repr=\u4eba\u6c11') b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
#Test replace error handler. #Test replace error handler.
text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff') check_format('repr=abc\ufffd',
self.assertEqual(text, 'repr=abc\ufffd') b'repr=%V', None, b'abc\xff')
# not supported: copy the raw format string. these tests are just here
# to check for crashs and should not be considered as specifications
check_format('%s',
b'%1%s', b'abc')
check_format('%1abc',
b'%1abc')
check_format('%+i',
b'%+i', c_int(10))
check_format('%s',
b'%.%s', b'abc')
# Test PyUnicode_AsWideChar() # Test PyUnicode_AsWideChar()
def test_aswidechar(self): def test_aswidechar(self):
......
...@@ -2,6 +2,18 @@ ...@@ -2,6 +2,18 @@
Python News Python News
+++++++++++ +++++++++++
What's New in Python 3.2.7?
============================
*Release date: XXXX-XX-XX*
Core and Builtins
-----------------
- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
and fix by Guido Vranken.
What's New in Python 3.2.6? What's New in Python 3.2.6?
=========================== ===========================
......
...@@ -759,15 +759,10 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) ...@@ -759,15 +759,10 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
* result in an array) */ * result in an array) */
for (f = format; *f; f++) { for (f = format; *f; f++) {
if (*f == '%') { if (*f == '%') {
if (*(f+1)=='%') f++;
continue; while (*f && *f != '%' && !Py_ISALPHA((unsigned)*f))
if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A' || *(f+1) == 'V') f++;
++callcount; if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V')
while (Py_ISDIGIT((unsigned)*f))
width = (width*10) + *f++ - '0';
while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f))
;
if (*f == 's')
++callcount; ++callcount;
} }
else if (128 <= (unsigned char)*f) { else if (128 <= (unsigned char)*f) {
...@@ -794,12 +789,16 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) ...@@ -794,12 +789,16 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
#ifdef HAVE_LONG_LONG #ifdef HAVE_LONG_LONG
int longlongflag = 0; int longlongflag = 0;
#endif #endif
const char* p = f; const char* p = f++;
width = 0; width = 0;
while (Py_ISDIGIT((unsigned)*f)) while (Py_ISDIGIT((unsigned)*f))
width = (width*10) + *f++ - '0'; width = (width*10) + *f++ - '0';
while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f)) precision = 0;
; if (*f == '.') {
f++;
while (Py_ISDIGIT((unsigned)*f))
precision = (precision*10) + *f++ - '0';
}
/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
* they don't affect the amount of space we reserve. * they don't affect the amount of space we reserve.
...@@ -823,16 +822,18 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) ...@@ -823,16 +822,18 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
switch (*f) { switch (*f) {
case 'c': case 'c':
{ {
#ifndef Py_UNICODE_WIDE
int ordinal = va_arg(count, int); int ordinal = va_arg(count, int);
if (ordinal < 0 || ordinal > 0x10ffff) {
PyErr_SetString(PyExc_OverflowError,
"%c arg not in range(0x110000)");
goto fail;
}
#ifndef Py_UNICODE_WIDE
if (ordinal > 0xffff) if (ordinal > 0xffff)
n += 2; n += 2;
else else
n++;
#else
(void)va_arg(count, int);
n++;
#endif #endif
n++;
break; break;
} }
case '%': case '%':
...@@ -840,6 +841,8 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) ...@@ -840,6 +841,8 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
break; break;
case 'd': case 'u': case 'i': case 'x': case 'd': case 'u': case 'i': case 'x':
(void) va_arg(count, int); (void) va_arg(count, int);
if (width < precision)
width = precision;
#ifdef HAVE_LONG_LONG #ifdef HAVE_LONG_LONG
if (longlongflag) { if (longlongflag) {
if (width < MAX_LONG_LONG_CHARS) if (width < MAX_LONG_LONG_CHARS)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment