Commit d914e3f8 authored by Bob Ippolito's avatar Bob Ippolito

merge json library with simplejson 2.0.9 (issue 4136)

parent 277859d5
r"""A simple, fast, extensible JSON encoder and decoder
JSON (JavaScript Object Notation) <http://json.org> is a subset of
r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
interchange format.
json exposes an API familiar to uses of the standard library
marshal and pickle modules.
:mod:`json` exposes an API familiar to users of the standard library
:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
version of the :mod:`json` library contained in Python 2.6, but maintains
compatibility with Python 2.4 and Python 2.5 and (currently) has
significant performance advantages, even without using the optional C
extension for speedups.
Encoding basic Python object hierarchies::
......@@ -32,23 +34,28 @@ Compact encoding::
>>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
'[1,2,3,{"4":5,"6":7}]'
Pretty printing (using repr() because of extraneous whitespace in the output)::
Pretty printing::
>>> import json
>>> print repr(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4))
'{\n "4": 5, \n "6": 7\n}'
>>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)
>>> print '\n'.join([l.rstrip() for l in s.splitlines()])
{
"4": 5,
"6": 7
}
Decoding JSON::
>>> import json
>>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]')
[u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
>>> json.loads('"\\"foo\\bar"')
u'"foo\x08ar'
>>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
>>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
True
>>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
True
>>> from StringIO import StringIO
>>> io = StringIO('["streaming API"]')
>>> json.load(io)
[u'streaming API']
>>> json.load(io)[0] == 'streaming API'
True
Specializing JSON object decoding::
......@@ -61,43 +68,36 @@ Specializing JSON object decoding::
>>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
... object_hook=as_complex)
(1+2j)
>>> import decimal
>>> json.loads('1.1', parse_float=decimal.Decimal)
Decimal('1.1')
>>> from decimal import Decimal
>>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
True
Extending JSONEncoder::
Specializing JSON object encoding::
>>> import json
>>> class ComplexEncoder(json.JSONEncoder):
... def default(self, obj):
... if isinstance(obj, complex):
... return [obj.real, obj.imag]
... return json.JSONEncoder.default(self, obj)
>>> def encode_complex(obj):
... if isinstance(obj, complex):
... return [obj.real, obj.imag]
... raise TypeError(repr(o) + " is not JSON serializable")
...
>>> dumps(2 + 1j, cls=ComplexEncoder)
>>> json.dumps(2 + 1j, default=encode_complex)
'[2.0, 1.0]'
>>> ComplexEncoder().encode(2 + 1j)
>>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
'[2.0, 1.0]'
>>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
'[2.0, 1.0]'
>>> list(ComplexEncoder().iterencode(2 + 1j))
['[', '2.0', ', ', '1.0', ']']
Using json.tool from the shell to validate and
pretty-print::
Using json.tool from the shell to validate and pretty-print::
$ echo '{"json":"obj"}' | python -mjson.tool
$ echo '{"json":"obj"}' | python -m json.tool
{
"json": "obj"
}
$ echo '{ 1.2:3.4}' | python -mjson.tool
$ echo '{ 1.2:3.4}' | python -m json.tool
Expecting property name: line 1 column 2 (char 2)
Note that the JSON produced by this module's default settings
is a subset of YAML, so it may be used as a serializer for that as well.
"""
__version__ = '1.9'
__version__ = '2.0.9'
__all__ = [
'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONEncoder',
......@@ -125,28 +125,29 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
"""Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
``.write()``-supporting file-like object).
If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
If ``skipkeys`` is true then ``dict`` keys that are not basic types
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
will be skipped instead of raising a ``TypeError``.
If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp``
If ``ensure_ascii`` is false, then the some chunks written to ``fp``
may be ``unicode`` instances, subject to normal Python ``str`` to
``unicode`` coercion rules. Unless ``fp.write()`` explicitly
understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
to cause an error.
If ``check_circular`` is ``False``, then the circular reference check
If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse).
If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
If ``allow_nan`` is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
in strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
If ``indent`` is a non-negative integer, then JSON array elements and object
members will be pretty-printed with that indent level. An indent level
of 0 will only insert newlines. ``None`` is the most compact representation.
If ``indent`` is a non-negative integer, then JSON array elements and
object members will be pretty-printed with that indent level. An indent
level of 0 will only insert newlines. ``None`` is the most compact
representation.
If ``separators`` is an ``(item_separator, dict_separator)`` tuple
then it will be used instead of the default ``(', ', ': ')`` separators.
......@@ -163,8 +164,8 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
"""
# cached encoder
if (skipkeys is False and ensure_ascii is True and
check_circular is True and allow_nan is True and
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and not kw):
iterable = _default_encoder.iterencode(obj)
......@@ -186,19 +187,19 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
encoding='utf-8', default=None, **kw):
"""Serialize ``obj`` to a JSON formatted ``str``.
If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
If ``skipkeys`` is false then ``dict`` keys that are not basic types
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
will be skipped instead of raising a ``TypeError``.
If ``ensure_ascii`` is ``False``, then the return value will be a
If ``ensure_ascii`` is false, then the return value will be a
``unicode`` instance subject to normal Python ``str`` to ``unicode``
coercion rules instead of being escaped to an ASCII ``str``.
If ``check_circular`` is ``False``, then the circular reference check
If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse).
If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
If ``allow_nan`` is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
......@@ -223,8 +224,8 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
"""
# cached encoder
if (skipkeys is False and ensure_ascii is True and
check_circular is True and allow_nan is True and
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and not kw):
return _default_encoder.encode(obj)
......@@ -242,8 +243,8 @@ _default_decoder = JSONDecoder(encoding=None, object_hook=None)
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, **kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object
containing a JSON document) to a Python object.
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
a JSON document) to a Python object.
If the contents of ``fp`` is encoded with an ASCII based encoding other
than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
......
This diff is collapsed.
This diff is collapsed.
"""Iterator based sre token scanner
"""JSON token scanner
"""
import re
import sre_parse
import sre_compile
import sre_constants
from re import VERBOSE, MULTILINE, DOTALL
from sre_constants import BRANCH, SUBPATTERN
try:
from _json import make_scanner as c_make_scanner
except ImportError:
c_make_scanner = None
__all__ = ['Scanner', 'pattern']
__all__ = ['make_scanner']
FLAGS = (VERBOSE | MULTILINE | DOTALL)
NUMBER_RE = re.compile(
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
(re.VERBOSE | re.MULTILINE | re.DOTALL))
class Scanner(object):
def __init__(self, lexicon, flags=FLAGS):
self.actions = [None]
# Combine phrases into a compound pattern
s = sre_parse.Pattern()
s.flags = flags
p = []
for idx, token in enumerate(lexicon):
phrase = token.pattern
try:
subpattern = sre_parse.SubPattern(s,
[(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
except sre_constants.error:
raise
p.append(subpattern)
self.actions.append(token)
def py_make_scanner(context):
parse_object = context.parse_object
parse_array = context.parse_array
parse_string = context.parse_string
match_number = NUMBER_RE.match
encoding = context.encoding
strict = context.strict
parse_float = context.parse_float
parse_int = context.parse_int
parse_constant = context.parse_constant
object_hook = context.object_hook
s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
self.scanner = sre_compile.compile(p)
def _scan_once(string, idx):
try:
nextchar = string[idx]
except IndexError:
raise StopIteration
def iterscan(self, string, idx=0, context=None):
"""Yield match, end_idx for each match
if nextchar == '"':
return parse_string(string, idx + 1, encoding, strict)
elif nextchar == '{':
return parse_object((string, idx + 1), encoding, strict,
_scan_once, object_hook)
elif nextchar == '[':
return parse_array((string, idx + 1), _scan_once)
elif nextchar == 'n' and string[idx:idx + 4] == 'null':
return None, idx + 4
elif nextchar == 't' and string[idx:idx + 4] == 'true':
return True, idx + 4
elif nextchar == 'f' and string[idx:idx + 5] == 'false':
return False, idx + 5
"""
match = self.scanner.scanner(string, idx).match
actions = self.actions
lastend = idx
end = len(string)
while True:
m = match()
if m is None:
break
matchbegin, matchend = m.span()
if lastend == matchend:
break
action = actions[m.lastindex]
if action is not None:
rval, next_pos = action(m, context)
if next_pos is not None and next_pos != matchend:
# "fast forward" the scanner
matchend = next_pos
match = self.scanner.scanner(string, matchend).match
yield rval, matchend
lastend = matchend
m = match_number(string, idx)
if m is not None:
integer, frac, exp = m.groups()
if frac or exp:
res = parse_float(integer + (frac or '') + (exp or ''))
else:
res = parse_int(integer)
return res, m.end()
elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
return parse_constant('NaN'), idx + 3
elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
return parse_constant('Infinity'), idx + 8
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9
else:
raise StopIteration
return _scan_once
def pattern(pattern, flags=FLAGS):
def decorator(fn):
fn.pattern = pattern
fn.regex = re.compile(pattern, flags)
return fn
return decorator
make_scanner = c_make_scanner or py_make_scanner
from unittest import TestCase
import json
def default_iterable(obj):
return list(obj)
class TestCheckCircular(TestCase):
def test_circular_dict(self):
dct = {}
dct['a'] = dct
self.assertRaises(ValueError, json.dumps, dct)
def test_circular_list(self):
lst = []
lst.append(lst)
self.assertRaises(ValueError, json.dumps, lst)
def test_circular_composite(self):
dct2 = {}
dct2['a'] = []
dct2['a'].append(dct2)
self.assertRaises(ValueError, json.dumps, dct2)
def test_circular_default(self):
json.dumps([set()], default=default_iterable)
self.assertRaises(TypeError, json.dumps, [set()])
def test_circular_off_default(self):
json.dumps([set()], default=default_iterable, check_circular=False)
self.assertRaises(TypeError, json.dumps, [set()], check_circular=False)
......@@ -13,3 +13,10 @@ class TestDecode(TestCase):
rval = json.loads('1', parse_int=float)
self.assert_(isinstance(rval, float))
self.assertEquals(rval, 1.0)
def test_decoder_optimizations(self):
# Several optimizations were made that skip over calls to
# the whitespace regex, so this test is designed to try and
# exercise the uncommon cases. The array cases are already covered.
rval = json.loads('{ "key" : "value" , "k":"v" }')
self.assertEquals(rval, {"key":"value", "k":"v"})
......@@ -11,3 +11,11 @@ class TestDump(TestCase):
def test_dumps(self):
self.assertEquals(json.dumps({}), '{}')
def test_encode_truefalse(self):
self.assertEquals(json.dumps(
{True: False, False: True}, sort_keys=True),
'{"false": true, "true": false}')
self.assertEquals(json.dumps(
{2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True),
'{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}')
......@@ -26,10 +26,14 @@ class TestEncodeBaseStringAscii(TestCase):
self._test_encode_basestring_ascii(json.encoder.py_encode_basestring_ascii)
def test_c_encode_basestring_ascii(self):
if not json.encoder.c_encode_basestring_ascii:
return
self._test_encode_basestring_ascii(json.encoder.c_encode_basestring_ascii)
def _test_encode_basestring_ascii(self, encode_basestring_ascii):
fname = encode_basestring_ascii.__name__
for input_string, expect in CASES:
result = encode_basestring_ascii(input_string)
self.assertEquals(result, expect)
self.assertEquals(result, expect,
'{0!r} != {1!r} for {2}({3!r})'.format(
result, expect, fname, input_string))
......@@ -73,4 +73,4 @@ class TestFail(TestCase):
except ValueError:
pass
else:
self.fail("Expected failure for fail%d.json: %r" % (idx, doc))
self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc))
......@@ -5,5 +5,11 @@ import json
class TestFloat(TestCase):
def test_floats(self):
for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100]:
for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]:
self.assertEquals(float(json.dumps(num)), num)
self.assertEquals(json.loads(json.dumps(num)), num)
def test_ints(self):
for num in [1, 1L, 1<<32, 1<<64]:
self.assertEquals(json.dumps(num), str(num))
self.assertEquals(int(json.dumps(num)), num)
......@@ -51,5 +51,14 @@ class TestUnicode(TestCase):
def test_unicode_decode(self):
for i in range(0, 0xd7ff):
u = unichr(i)
js = '"\\u{0:04x}"'.format(i)
self.assertEquals(json.loads(js), u)
s = '"\\u{0:04x}"'.format(i)
self.assertEquals(json.loads(s), u)
def test_default_encoding(self):
self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')),
{'a': u'\xe9'})
def test_unicode_preservation(self):
self.assertEquals(type(json.loads(u'""')), unicode)
self.assertEquals(type(json.loads(u'"a"')), unicode)
self.assertEquals(type(json.loads(u'["a"]')[0]), unicode)
......@@ -2,11 +2,11 @@ r"""Command-line tool to validate and pretty-print JSON
Usage::
$ echo '{"json":"obj"}' | python -mjson.tool
$ echo '{"json":"obj"}' | python -m json.tool
{
"json": "obj"
}
$ echo '{ 1.2:3.4}' | python -mjson.tool
$ echo '{ 1.2:3.4}' | python -m json.tool
Expecting property name: line 1 column 2 (char 2)
"""
......@@ -24,7 +24,7 @@ def main():
infile = open(sys.argv[1], 'rb')
outfile = open(sys.argv[2], 'wb')
else:
raise SystemExit("{0} [infile [outfile]]".format(sys.argv[0]))
raise SystemExit(sys.argv[0] + " [infile [outfile]]")
try:
obj = json.load(infile)
except ValueError, e:
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment