merge heads

c53d21f8 · Georg Brandl · 7f568bf8 · b6b7a0eb · c53d21f8 · c53d21f8
Commit c53d21f8 authored Oct 28, 2012 by Georg Brandl
7 changed files
--- a/Doc/library/json.rst
+++ b/Doc/library/json.rst
@@ -116,7 +116,10 @@ Using json.tool from the shell to validate and pretty-print::
 Basic Usage
 -----------
-.. function:: dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, default=None, **kw)
+.. function:: dump(obj, fp, skipkeys=False, ensure_ascii=True, \
+                   check_circular=True, allow_nan=True, cls=None, \
+                   indent=None, separators=None, default=None, \
+                   sort_keys=False, **kw)
   Serialize *obj* as a JSON formatted stream to *fp* (a ``.write()``-supporting
   :term:`file-like object`).
@@ -159,12 +162,18 @@ Basic Usage
   *default(obj)* is a function that should return a serializable version of
   *obj* or raise :exc:`TypeError`.  The default simply raises :exc:`TypeError`.
+   If *sort_keys* is ``True`` (default: ``False``), then the output of
+   dictionaries will be sorted by key.
   To use a custom :class:`JSONEncoder` subclass (e.g. one that overrides the
   :meth:`default` method to serialize additional types), specify it with the
   *cls* kwarg; otherwise :class:`JSONEncoder` is used.
-.. function:: dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, default=None, **kw)
+.. function:: dumps(obj, skipkeys=False, ensure_ascii=True, \
+                    check_circular=True, allow_nan=True, cls=None, \
+                    indent=None, separators=None, default=None, \
+                    sort_keys=False, **kw)
   Serialize *obj* to a JSON formatted :class:`str`.  The arguments have the
   same meaning as in :func:`dump`.

--- a/Lib/json/__init__.py
+++ b/Lib/json/__init__.py
@@ -122,7 +122,7 @@ _default_encoder = JSONEncoder(
 def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
        allow_nan=True, cls=None, indent=None, separators=None,
-        default=None, **kw):
+        default=None, sort_keys=False, **kw):
    """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
    ``.write()``-supporting file-like object).
@@ -155,6 +155,9 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
    ``default(obj)`` is a function that should return a serializable version
    of obj or raise TypeError. The default simply raises TypeError.
+    If *sort_keys* is ``True`` (default: ``False``), then the output of
+    dictionaries will be sorted by key.
    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
    ``.default()`` method to serialize additional types), specify it with
    the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
@@ -164,7 +167,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
    if (not skipkeys and ensure_ascii and
        check_circular and allow_nan and
        cls is None and indent is None and separators is None and
-        default is None and not kw):
+        default is None and not sort_keys and not kw):
        iterable = _default_encoder.iterencode(obj)
    else:
        if cls is None:
@@ -172,7 +175,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
        iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
            check_circular=check_circular, allow_nan=allow_nan, indent=indent,
            separators=separators,
-            default=default, **kw).iterencode(obj)
+            default=default, sort_keys=sort_keys, **kw).iterencode(obj)
    # could accelerate with writelines in some versions of Python, at
    # a debuggability cost
    for chunk in iterable:
@@ -181,7 +184,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
 def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
        allow_nan=True, cls=None, indent=None, separators=None,
-        default=None, **kw):
+        default=None, sort_keys=False, **kw):
    """Serialize ``obj`` to a JSON formatted ``str``.
    If ``skipkeys`` is false then ``dict`` keys that are not basic types
@@ -213,6 +216,9 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
    ``default(obj)`` is a function that should return a serializable version
    of obj or raise TypeError. The default simply raises TypeError.
+    If *sort_keys* is ``True`` (default: ``False``), then the output of
+    dictionaries will be sorted by key.
    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
    ``.default()`` method to serialize additional types), specify it with
    the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
@@ -222,14 +228,14 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
    if (not skipkeys and ensure_ascii and
        check_circular and allow_nan and
        cls is None and indent is None and separators is None and
-        default is None and not kw):
+        default is None and not sort_keys and not kw):
        return _default_encoder.encode(obj)
    if cls is None:
        cls = JSONEncoder
    return cls(
        skipkeys=skipkeys, ensure_ascii=ensure_ascii,
        check_circular=check_circular, allow_nan=allow_nan, indent=indent,
-        separators=separators, default=default,
+        separators=separators, default=default, sort_keys=sort_keys,
        **kw).encode(obj)

--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -906,6 +906,21 @@ class UnicodeTest(string_tests.CommonTest,
        self.assertRaises(ValueError, '{}'.format_map, 'a')
        self.assertRaises(ValueError, '{a} {}'.format_map, {"a" : 2, "b" : 1})
+    def test_format_huge_precision(self):
+        format_string = ".{}f".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format(2.34, format_string)
+    def test_format_huge_width(self):
+        format_string = "{}f".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format(2.34, format_string)
+    def test_format_huge_item_number(self):
+        format_string = "{{{}:.6f}}".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format_string.format(2.34)
    def test_format_auto_numbering(self):
        class C:
            def __init__(self, x=100):
@@ -990,6 +1005,18 @@ class UnicodeTest(string_tests.CommonTest,
        self.assertEqual('%f' % INF, 'inf')
        self.assertEqual('%F' % INF, 'INF')
+    @support.cpython_only
+    def test_formatting_huge_precision(self):
+        from _testcapi import INT_MAX
+        format_string = "%.{}f".format(INT_MAX + 1)
+        with self.assertRaises(ValueError):
+            result = format_string % 2.34
+    def test_formatting_huge_width(self):
+        format_string = "%{}f".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format_string % 2.34
    def test_startswith_endswith_errors(self):
        for meth in ('foo'.startswith, 'foo'.endswith):
            with self.assertRaises(TypeError) as cm:

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.2.4
 Core and Builtins
 -----------------
+- Issue #14700: Fix buggy overflow checks when handling large precisions and
+  widths in old-style and new-style formatting.
 - Issue #6074: Ensure cached bytecode files can always be updated by the
  user that created them, even when the source file is read-only.

--- a/Objects/stringlib/formatter.h
+++ b/Objects/stringlib/formatter.h
@@ -73,7 +73,7 @@ static int
 get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
                  Py_ssize_t *result)
 {
-    Py_ssize_t accumulator, digitval, oldaccumulator;
+    Py_ssize_t accumulator, digitval;
    int numdigits;
    accumulator = numdigits = 0;
    for (;;(*ptr)++, numdigits++) {
@@ -83,19 +83,17 @@ get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
        if (digitval < 0)
            break;
        /*
-           This trick was copied from old Unicode format code.  It's cute,
+           Detect possible overflow before it happens:
-           but would really suck on an old machine with a slow divide
-           implementation.  Fortunately, in the normal case we do not
+              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
-           expect too many digits.
+              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
        */
-        oldaccumulator = accumulator;
+        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
-        accumulator *= 10;
-        if ((accumulator+10)/10 != oldaccumulator+1) {
            PyErr_Format(PyExc_ValueError,
                         "Too many decimal digits in format string");
            return -1;
        }
-        accumulator += digitval;
+        accumulator = accumulator * 10 + digitval;
    }
    *result = accumulator;
    return numdigits;

--- a/Objects/stringlib/string_format.h
+++ b/Objects/stringlib/string_format.h
@@ -197,7 +197,6 @@ get_integer(const SubString *str)
 {
    Py_ssize_t accumulator = 0;
    Py_ssize_t digitval;
-    Py_ssize_t oldaccumulator;
    STRINGLIB_CHAR *p;
    /* empty string is an error */
@@ -209,19 +208,17 @@ get_integer(const SubString *str)
        if (digitval < 0)
            return -1;
        /*
-           This trick was copied from old Unicode format code.  It's cute,
+           Detect possible overflow before it happens:
-           but would really suck on an old machine with a slow divide
-           implementation.  Fortunately, in the normal case we do not
+              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
-           expect too many digits.
+              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
        */
-        oldaccumulator = accumulator;
+        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
-        accumulator *= 10;
-        if ((accumulator+10)/10 != oldaccumulator+1) {
            PyErr_Format(PyExc_ValueError,
                         "Too many decimal digits in format string");
            return -1;
        }
-        accumulator += digitval;
+        accumulator = accumulator * 10 + digitval;
    }
    return accumulator;
 }

--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9648,7 +9648,7 @@ PyObject *PyUnicode_Format(PyObject *format,
                    c = *fmt++;
                    if (c < '0' || c > '9')
                        break;
-                    if ((width*10) / 10 != width) {
+                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
                        PyErr_SetString(PyExc_ValueError,
                                        "width too big");
                        goto onError;
@@ -9683,7 +9683,7 @@ PyObject *PyUnicode_Format(PyObject *format,
                        c = *fmt++;
                        if (c < '0' || c > '9')
                            break;
-                        if ((prec*10) / 10 != prec) {
+                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
                            PyErr_SetString(PyExc_ValueError,
                                            "prec too big");
                            goto onError;