merge json library with simplejson 2.0.9 (issue 4136)

d914e3f8 · Bob Ippolito · 277859d5 · d914e3f8 · d914e3f8 · d914e3f8
Commit d914e3f8 authored Mar 17, 2009 by Bob Ippolito
13 changed files
--- a/Lib/json/__init__.py
+++ b/Lib/json/__init__.py
-r"""A simple, fast, extensible JSON encoder and decoder
-
-JSON (JavaScript Object Notation) <http://json.org> is a subset of
+r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
 JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
 interchange format.

-json exposes an API familiar to uses of the standard library
-marshal and pickle modules.
+:mod:`json` exposes an API familiar to users of the standard library
+:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
+version of the :mod:`json` library contained in Python 2.6, but maintains
+compatibility with Python 2.4 and Python 2.5 and (currently) has
+significant performance advantages, even without using the optional C
+extension for speedups.

 Encoding basic Python object hierarchies::

@@ -32,23 +34,28 @@ Compact encoding::
    >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
    '[1,2,3,{"4":5,"6":7}]'

-Pretty printing (using repr() because of extraneous whitespace in the output)::
+Pretty printing::

    >>> import json
-    >>> print repr(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4))
-    '{\n    "4": 5, \n    "6": 7\n}'
+    >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)
+    >>> print '\n'.join([l.rstrip() for l in  s.splitlines()])
+    {
+        "4": 5,
+        "6": 7
+    }

 Decoding JSON::

    >>> import json
-    >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]')
-    [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
-    >>> json.loads('"\\"foo\\bar"')
-    u'"foo\x08ar'
+    >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
+    >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
+    True
+    >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
+    True
    >>> from StringIO import StringIO
    >>> io = StringIO('["streaming API"]')
-    >>> json.load(io)
-    [u'streaming API']
+    >>> json.load(io)[0] == 'streaming API'
+    True

 Specializing JSON object decoding::

@@ -61,43 +68,36 @@ Specializing JSON object decoding::
    >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
    ...     object_hook=as_complex)
    (1+2j)
-    >>> import decimal
-    >>> json.loads('1.1', parse_float=decimal.Decimal)
-    Decimal('1.1')
+    >>> from decimal import Decimal
+    >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
+    True

-Extending JSONEncoder::
+Specializing JSON object encoding::

    >>> import json
-    >>> class ComplexEncoder(json.JSONEncoder):
-    ...     def default(self, obj):
-    ...         if isinstance(obj, complex):
-    ...             return [obj.real, obj.imag]
-    ...         return json.JSONEncoder.default(self, obj)
+    >>> def encode_complex(obj):
+    ...     if isinstance(obj, complex):
+    ...         return [obj.real, obj.imag]
+    ...     raise TypeError(repr(o) + " is not JSON serializable")
    ...
-    >>> dumps(2 + 1j, cls=ComplexEncoder)
+    >>> json.dumps(2 + 1j, default=encode_complex)
    '[2.0, 1.0]'
-    >>> ComplexEncoder().encode(2 + 1j)
+    >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
+    '[2.0, 1.0]'
+    >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
    '[2.0, 1.0]'
-    >>> list(ComplexEncoder().iterencode(2 + 1j))
-    ['[', '2.0', ', ', '1.0', ']']


-Using json.tool from the shell to validate and
-pretty-print::
+Using json.tool from the shell to validate and pretty-print::

-    $ echo '{"json":"obj"}' | python -mjson.tool
+    $ echo '{"json":"obj"}' | python -m json.tool
    {
        "json": "obj"
    }
-    $ echo '{ 1.2:3.4}' | python -mjson.tool
+    $ echo '{ 1.2:3.4}' | python -m json.tool
    Expecting property name: line 1 column 2 (char 2)
-
-Note that the JSON produced by this module's default settings
-is a subset of YAML, so it may be used as a serializer for that as well.
-
 """
-
-__version__ = '1.9'
+__version__ = '2.0.9'
 __all__ = [
    'dump', 'dumps', 'load', 'loads',
    'JSONDecoder', 'JSONEncoder',
@@ -125,28 +125,29 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
    """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
    ``.write()``-supporting file-like object).

-    If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
+    If ``skipkeys`` is true then ``dict`` keys that are not basic types
    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
    will be skipped instead of raising a ``TypeError``.

-    If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp``
+    If ``ensure_ascii`` is false, then the some chunks written to ``fp``
    may be ``unicode`` instances, subject to normal Python ``str`` to
    ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
    understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
    to cause an error.

-    If ``check_circular`` is ``False``, then the circular reference check
+    If ``check_circular`` is false, then the circular reference check
    for container types will be skipped and a circular reference will
    result in an ``OverflowError`` (or worse).

-    If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
+    If ``allow_nan`` is false, then it will be a ``ValueError`` to
    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
    in strict compliance of the JSON specification, instead of using the
    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).

-    If ``indent`` is a non-negative integer, then JSON array elements and object
-    members will be pretty-printed with that indent level. An indent level
-    of 0 will only insert newlines. ``None`` is the most compact representation.
+    If ``indent`` is a non-negative integer, then JSON array elements and
+    object members will be pretty-printed with that indent level. An indent
+    level of 0 will only insert newlines. ``None`` is the most compact
+    representation.

    If ``separators`` is an ``(item_separator, dict_separator)`` tuple
    then it will be used instead of the default ``(', ', ': ')`` separators.
@@ -163,8 +164,8 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,

    """
    # cached encoder
-    if (skipkeys is False and ensure_ascii is True and
-        check_circular is True and allow_nan is True and
+    if (not skipkeys and ensure_ascii and
+        check_circular and allow_nan and
        cls is None and indent is None and separators is None and
        encoding == 'utf-8' and default is None and not kw):
        iterable = _default_encoder.iterencode(obj)
@@ -186,19 +187,19 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
        encoding='utf-8', default=None, **kw):
    """Serialize ``obj`` to a JSON formatted ``str``.

-    If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
+    If ``skipkeys`` is false then ``dict`` keys that are not basic types
    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
    will be skipped instead of raising a ``TypeError``.

-    If ``ensure_ascii`` is ``False``, then the return value will be a
+    If ``ensure_ascii`` is false, then the return value will be a
    ``unicode`` instance subject to normal Python ``str`` to ``unicode``
    coercion rules instead of being escaped to an ASCII ``str``.

-    If ``check_circular`` is ``False``, then the circular reference check
+    If ``check_circular`` is false, then the circular reference check
    for container types will be skipped and a circular reference will
    result in an ``OverflowError`` (or worse).

-    If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
+    If ``allow_nan`` is false, then it will be a ``ValueError`` to
    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
    strict compliance of the JSON specification, instead of using the
    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
@@ -223,8 +224,8 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,

    """
    # cached encoder
-    if (skipkeys is False and ensure_ascii is True and
-        check_circular is True and allow_nan is True and
+    if (not skipkeys and ensure_ascii and
+        check_circular and allow_nan and
        cls is None and indent is None and separators is None and
        encoding == 'utf-8' and default is None and not kw):
        return _default_encoder.encode(obj)
@@ -242,8 +243,8 @@ _default_decoder = JSONDecoder(encoding=None, object_hook=None)

 def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
        parse_int=None, parse_constant=None, **kw):
-    """Deserialize ``fp`` (a ``.read()``-supporting file-like object
-    containing a JSON document) to a Python object.
+    """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
+    a JSON document) to a Python object.

    If the contents of ``fp`` is encoded with an ASCII based encoding other
    than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must

--- a/Lib/json/decoder.py
+++ b/Lib/json/decoder.py
--- a/Lib/json/encoder.py
+++ b/Lib/json/encoder.py
--- a/Lib/json/scanner.py
+++ b/Lib/json/scanner.py
-"""Iterator based sre token scanner
-
+"""JSON token scanner
 """
-
 import re
-import sre_parse
-import sre_compile
-import sre_constants
-
-from re import VERBOSE, MULTILINE, DOTALL
-from sre_constants import BRANCH, SUBPATTERN
+try:
+    from _json import make_scanner as c_make_scanner
+except ImportError:
+    c_make_scanner = None

-__all__ = ['Scanner', 'pattern']
+__all__ = ['make_scanner']

-FLAGS = (VERBOSE | MULTILINE | DOTALL)
+NUMBER_RE = re.compile(
+    r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
+    (re.VERBOSE | re.MULTILINE | re.DOTALL))

-class Scanner(object):
-    def __init__(self, lexicon, flags=FLAGS):
-        self.actions = [None]
-        # Combine phrases into a compound pattern
-        s = sre_parse.Pattern()
-        s.flags = flags
-        p = []
-        for idx, token in enumerate(lexicon):
-            phrase = token.pattern
-            try:
-                subpattern = sre_parse.SubPattern(s,
-                    [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
-            except sre_constants.error:
-                raise
-            p.append(subpattern)
-            self.actions.append(token)
+def py_make_scanner(context):
+    parse_object = context.parse_object
+    parse_array = context.parse_array
+    parse_string = context.parse_string
+    match_number = NUMBER_RE.match
+    encoding = context.encoding
+    strict = context.strict
+    parse_float = context.parse_float
+    parse_int = context.parse_int
+    parse_constant = context.parse_constant
+    object_hook = context.object_hook

-        s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
-        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
-        self.scanner = sre_compile.compile(p)
+    def _scan_once(string, idx):
+        try:
+            nextchar = string[idx]
+        except IndexError:
+            raise StopIteration

-    def iterscan(self, string, idx=0, context=None):
-        """Yield match, end_idx for each match
+        if nextchar == '"':
+            return parse_string(string, idx + 1, encoding, strict)
+        elif nextchar == '{':
+            return parse_object((string, idx + 1), encoding, strict,
+                _scan_once, object_hook)
+        elif nextchar == '[':
+            return parse_array((string, idx + 1), _scan_once)
+        elif nextchar == 'n' and string[idx:idx + 4] == 'null':
+            return None, idx + 4
+        elif nextchar == 't' and string[idx:idx + 4] == 'true':
+            return True, idx + 4
+        elif nextchar == 'f' and string[idx:idx + 5] == 'false':
+            return False, idx + 5

-        """
-        match = self.scanner.scanner(string, idx).match
-        actions = self.actions
-        lastend = idx
-        end = len(string)
-        while True:
-            m = match()
-            if m is None:
-                break
-            matchbegin, matchend = m.span()
-            if lastend == matchend:
-                break
-            action = actions[m.lastindex]
-            if action is not None:
-                rval, next_pos = action(m, context)
-                if next_pos is not None and next_pos != matchend:
-                    # "fast forward" the scanner
-                    matchend = next_pos
-                    match = self.scanner.scanner(string, matchend).match
-                yield rval, matchend
-            lastend = matchend
+        m = match_number(string, idx)
+        if m is not None:
+            integer, frac, exp = m.groups()
+            if frac or exp:
+                res = parse_float(integer + (frac or '') + (exp or ''))
+            else:
+                res = parse_int(integer)
+            return res, m.end()
+        elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
+            return parse_constant('NaN'), idx + 3
+        elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
+            return parse_constant('Infinity'), idx + 8
+        elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
+            return parse_constant('-Infinity'), idx + 9
+        else:
+            raise StopIteration

+    return _scan_once

-def pattern(pattern, flags=FLAGS):
-    def decorator(fn):
-        fn.pattern = pattern
-        fn.regex = re.compile(pattern, flags)
-        return fn
-    return decorator
+make_scanner = c_make_scanner or py_make_scanner
--- a/Lib/json/tests/test_check_circular.py
+++ b/Lib/json/tests/test_check_circular.py
+from unittest import TestCase
+import json
+
+def default_iterable(obj):
+    return list(obj)
+
+class TestCheckCircular(TestCase):
+    def test_circular_dict(self):
+        dct = {}
+        dct['a'] = dct
+        self.assertRaises(ValueError, json.dumps, dct)
+
+    def test_circular_list(self):
+        lst = []
+        lst.append(lst)
+        self.assertRaises(ValueError, json.dumps, lst)
+
+    def test_circular_composite(self):
+        dct2 = {}
+        dct2['a'] = []
+        dct2['a'].append(dct2)
+        self.assertRaises(ValueError, json.dumps, dct2)
+
+    def test_circular_default(self):
+        json.dumps([set()], default=default_iterable)
+        self.assertRaises(TypeError, json.dumps, [set()])
+
+    def test_circular_off_default(self):
+        json.dumps([set()], default=default_iterable, check_circular=False)
+        self.assertRaises(TypeError, json.dumps, [set()], check_circular=False)
--- a/Lib/json/tests/test_decode.py
+++ b/Lib/json/tests/test_decode.py
@@ -13,3 +13,10 @@ class TestDecode(TestCase):
        rval = json.loads('1', parse_int=float)
        self.assert_(isinstance(rval, float))
        self.assertEquals(rval, 1.0)
+
+    def test_decoder_optimizations(self):
+        # Several optimizations were made that skip over calls to
+        # the whitespace regex, so this test is designed to try and
+        # exercise the uncommon cases. The array cases are already covered.
+        rval = json.loads('{   "key"    :    "value"    ,  "k":"v"    }')
+        self.assertEquals(rval, {"key":"value", "k":"v"})
--- a/Lib/json/tests/test_dump.py
+++ b/Lib/json/tests/test_dump.py
@@ -11,3 +11,11 @@ class TestDump(TestCase):

    def test_dumps(self):
        self.assertEquals(json.dumps({}), '{}')
+
+    def test_encode_truefalse(self):
+        self.assertEquals(json.dumps(
+                 {True: False, False: True}, sort_keys=True),
+                 '{"false": true, "true": false}')
+        self.assertEquals(json.dumps(
+                {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True),
+                '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}')
--- a/Lib/json/tests/test_encode_basestring_ascii.py
+++ b/Lib/json/tests/test_encode_basestring_ascii.py
@@ -26,10 +26,14 @@ class TestEncodeBaseStringAscii(TestCase):
        self._test_encode_basestring_ascii(json.encoder.py_encode_basestring_ascii)

    def test_c_encode_basestring_ascii(self):
+        if not json.encoder.c_encode_basestring_ascii:
+            return
        self._test_encode_basestring_ascii(json.encoder.c_encode_basestring_ascii)

    def _test_encode_basestring_ascii(self, encode_basestring_ascii):
        fname = encode_basestring_ascii.__name__
        for input_string, expect in CASES:
            result = encode_basestring_ascii(input_string)
-            self.assertEquals(result, expect)
+            self.assertEquals(result, expect,
+                '{0!r} != {1!r} for {2}({3!r})'.format(
+                    result, expect, fname, input_string))
--- a/Lib/json/tests/test_fail.py
+++ b/Lib/json/tests/test_fail.py
@@ -73,4 +73,4 @@ class TestFail(TestCase):
            except ValueError:
                pass
            else:
-                self.fail("Expected failure for fail%d.json: %r" % (idx, doc))
+                self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc))
--- a/Lib/json/tests/test_float.py
+++ b/Lib/json/tests/test_float.py
@@ -5,5 +5,11 @@ import json

 class TestFloat(TestCase):
    def test_floats(self):
-        for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100]:
+        for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]:
            self.assertEquals(float(json.dumps(num)), num)
+            self.assertEquals(json.loads(json.dumps(num)), num)
+
+    def test_ints(self):
+        for num in [1, 1L, 1<<32, 1<<64]:
+            self.assertEquals(json.dumps(num), str(num))
+            self.assertEquals(int(json.dumps(num)), num)
--- a/Lib/json/tests/test_unicode.py
+++ b/Lib/json/tests/test_unicode.py
@@ -51,5 +51,14 @@ class TestUnicode(TestCase):
    def test_unicode_decode(self):
        for i in range(0, 0xd7ff):
            u = unichr(i)
-            js = '"\\u{0:04x}"'.format(i)
-            self.assertEquals(json.loads(js), u)
+            s = '"\\u{0:04x}"'.format(i)
+            self.assertEquals(json.loads(s), u)
+
+    def test_default_encoding(self):
+        self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')),
+            {'a': u'\xe9'})
+
+    def test_unicode_preservation(self):
+        self.assertEquals(type(json.loads(u'""')), unicode)
+        self.assertEquals(type(json.loads(u'"a"')), unicode)
+        self.assertEquals(type(json.loads(u'["a"]')[0]), unicode)
--- a/Lib/json/tool.py
+++ b/Lib/json/tool.py
@@ -2,11 +2,11 @@ r"""Command-line tool to validate and pretty-print JSON

 Usage::

-    $ echo '{"json":"obj"}' | python -mjson.tool
+    $ echo '{"json":"obj"}' | python -m json.tool
    {
        "json": "obj"
    }
-    $ echo '{ 1.2:3.4}' | python -mjson.tool
+    $ echo '{ 1.2:3.4}' | python -m json.tool
    Expecting property name: line 1 column 2 (char 2)

 """
@@ -24,7 +24,7 @@ def main():
        infile = open(sys.argv[1], 'rb')
        outfile = open(sys.argv[2], 'wb')
    else:
-        raise SystemExit("{0} [infile [outfile]]".format(sys.argv[0]))
+        raise SystemExit(sys.argv[0] + " [infile [outfile]]")
    try:
        obj = json.load(infile)
    except ValueError, e:

--- a/Modules/_json.c
+++ b/Modules/_json.c