Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
df7fc9dd
Commit
df7fc9dd
authored
Aug 19, 2011
by
Antoine Pitrou
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #12778: Reduce memory consumption when JSON-encoding a large container of many small objects.
parent
9bb6dfe0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
175 additions
and
50 deletions
+175
-50
Lib/test/json_tests/test_dump.py
Lib/test/json_tests/test_dump.py
+18
-1
Misc/NEWS
Misc/NEWS
+3
-0
Modules/_json.c
Modules/_json.c
+154
-49
No files found.
Lib/test/json_tests/test_dump.py
View file @
df7fc9dd
from
io
import
StringIO
from
test.json_tests
import
PyTest
,
CTest
from
test.support
import
precisionbigmemtest
,
_1G
class
TestDump
:
def
test_dump
(
self
):
...
...
@@ -21,4 +22,20 @@ class TestDump:
class
TestPyDump
(
TestDump
,
PyTest
):
pass
class
TestCDump
(
TestDump
,
CTest
):
pass
class
TestCDump
(
TestDump
,
CTest
):
# The size requirement here is hopefully over-estimated (actual
# memory consumption depending on implementation details, and also
# system memory management, since this may allocate a lot of
# small objects).
@
precisionbigmemtest
(
size
=
_1G
,
memuse
=
1
)
def
test_large_list
(
self
,
size
):
N
=
int
(
30
*
1024
*
1024
*
(
size
/
_1G
))
l
=
[
1
]
*
N
encoded
=
self
.
dumps
(
l
)
self
.
assertEqual
(
len
(
encoded
),
N
*
3
)
self
.
assertEqual
(
encoded
[:
1
],
"["
)
self
.
assertEqual
(
encoded
[
-
2
:],
"1]"
)
self
.
assertEqual
(
encoded
[
1
:
-
2
],
"1, "
*
(
N
-
1
))
Misc/NEWS
View file @
df7fc9dd
...
...
@@ -262,6 +262,9 @@ Core and Builtins
Library
-------
- Issue #12778: Reduce memory consumption when JSON-encoding a large
container of many small objects.
- Issue #12650: Fix a race condition where a subprocess.Popen could leak
resources (FD/zombie) when killed at the wrong time.
...
...
Modules/_json.c
View file @
df7fc9dd
...
...
@@ -75,6 +75,121 @@ static PyMemberDef encoder_members[] = {
{
NULL
}
};
/*
* A two-level accumulator of unicode objects that avoids both the overhead
* of keeping a huge number of small separate objects, and the quadratic
* behaviour of using a naive repeated concatenation scheme.
*/
typedef
struct
{
PyObject
*
large
;
/* A list of previously accumulated large strings */
PyObject
*
small
;
/* Pending small strings */
}
accumulator
;
static
PyObject
*
join_list_unicode
(
PyObject
*
lst
)
{
/* return u''.join(lst) */
static
PyObject
*
sep
=
NULL
;
if
(
sep
==
NULL
)
{
sep
=
PyUnicode_FromStringAndSize
(
""
,
0
);
if
(
sep
==
NULL
)
return
NULL
;
}
return
PyUnicode_Join
(
sep
,
lst
);
}
static
int
init_accumulator
(
accumulator
*
acc
)
{
acc
->
large
=
PyList_New
(
0
);
if
(
acc
->
large
==
NULL
)
return
-
1
;
acc
->
small
=
PyList_New
(
0
);
if
(
acc
->
small
==
NULL
)
{
Py_CLEAR
(
acc
->
large
);
return
-
1
;
}
return
0
;
}
static
int
flush_accumulator
(
accumulator
*
acc
)
{
Py_ssize_t
nsmall
=
PyList_GET_SIZE
(
acc
->
small
);
if
(
nsmall
)
{
int
ret
;
PyObject
*
joined
=
join_list_unicode
(
acc
->
small
);
if
(
joined
==
NULL
)
return
-
1
;
if
(
PyList_SetSlice
(
acc
->
small
,
0
,
nsmall
,
NULL
))
{
Py_DECREF
(
joined
);
return
-
1
;
}
ret
=
PyList_Append
(
acc
->
large
,
joined
);
Py_DECREF
(
joined
);
return
ret
;
}
return
0
;
}
static
int
accumulate_unicode
(
accumulator
*
acc
,
PyObject
*
obj
)
{
int
ret
;
Py_ssize_t
nsmall
;
assert
(
PyUnicode_Check
(
obj
));
if
(
PyList_Append
(
acc
->
small
,
obj
))
return
-
1
;
nsmall
=
PyList_GET_SIZE
(
acc
->
small
);
/* Each item in a list of unicode objects has an overhead (in 64-bit
* builds) of:
* - 8 bytes for the list slot
* - 56 bytes for the header of the unicode object
* that is, 64 bytes. 100000 such objects waste more than 6MB
* compared to a single concatenated string.
*/
if
(
nsmall
<
100000
)
return
0
;
PyObject
*
joined
=
join_list_unicode
(
acc
->
small
);
if
(
joined
==
NULL
)
return
-
1
;
if
(
PyList_SetSlice
(
acc
->
small
,
0
,
nsmall
,
NULL
))
{
Py_DECREF
(
joined
);
return
-
1
;
}
ret
=
PyList_Append
(
acc
->
large
,
joined
);
Py_DECREF
(
joined
);
return
ret
;
}
static
PyObject
*
finish_accumulator
(
accumulator
*
acc
)
{
int
ret
;
PyObject
*
res
;
ret
=
flush_accumulator
(
acc
);
Py_CLEAR
(
acc
->
small
);
if
(
ret
)
{
Py_CLEAR
(
acc
->
large
);
return
NULL
;
}
res
=
acc
->
large
;
acc
->
large
=
NULL
;
return
res
;
}
static
void
destroy_accumulator
(
accumulator
*
acc
)
{
Py_CLEAR
(
acc
->
small
);
Py_CLEAR
(
acc
->
large
);
}
/* Forward decls */
static
PyObject
*
ascii_escape_unicode
(
PyObject
*
pystr
);
static
PyObject
*
...
...
@@ -101,11 +216,11 @@ encoder_dealloc(PyObject *self);
static
int
encoder_clear
(
PyObject
*
self
);
static
int
encoder_listencode_list
(
PyEncoderObject
*
s
,
PyObject
*
rval
,
PyObject
*
seq
,
Py_ssize_t
indent_level
);
encoder_listencode_list
(
PyEncoderObject
*
s
,
accumulator
*
acc
,
PyObject
*
seq
,
Py_ssize_t
indent_level
);
static
int
encoder_listencode_obj
(
PyEncoderObject
*
s
,
PyObject
*
rval
,
PyObject
*
obj
,
Py_ssize_t
indent_level
);
encoder_listencode_obj
(
PyEncoderObject
*
s
,
accumulator
*
acc
,
PyObject
*
obj
,
Py_ssize_t
indent_level
);
static
int
encoder_listencode_dict
(
PyEncoderObject
*
s
,
PyObject
*
rval
,
PyObject
*
dct
,
Py_ssize_t
indent_level
);
encoder_listencode_dict
(
PyEncoderObject
*
s
,
accumulator
*
acc
,
PyObject
*
dct
,
Py_ssize_t
indent_level
);
static
PyObject
*
_encoded_const
(
PyObject
*
obj
);
static
void
...
...
@@ -266,19 +381,6 @@ raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
}
}
static
PyObject
*
join_list_unicode
(
PyObject
*
lst
)
{
/* return u''.join(lst) */
static
PyObject
*
sep
=
NULL
;
if
(
sep
==
NULL
)
{
sep
=
PyUnicode_FromStringAndSize
(
""
,
0
);
if
(
sep
==
NULL
)
return
NULL
;
}
return
PyUnicode_Join
(
sep
,
lst
);
}
static
PyObject
*
_build_rval_index_tuple
(
PyObject
*
rval
,
Py_ssize_t
idx
)
{
/* return (rval, idx) tuple, stealing reference to rval */
...
...
@@ -1226,22 +1328,22 @@ encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
/* Python callable interface to encode_listencode_obj */
static
char
*
kwlist
[]
=
{
"obj"
,
"_current_indent_level"
,
NULL
};
PyObject
*
obj
;
PyObject
*
rval
;
Py_ssize_t
indent_level
;
PyEncoderObject
*
s
;
accumulator
acc
;
assert
(
PyEncoder_Check
(
self
));
s
=
(
PyEncoderObject
*
)
self
;
if
(
!
PyArg_ParseTupleAndKeywords
(
args
,
kwds
,
"OO&:_iterencode"
,
kwlist
,
&
obj
,
_convertPyInt_AsSsize_t
,
&
indent_level
))
return
NULL
;
rval
=
PyList_New
(
0
);
if
(
rval
==
NULL
)
if
(
init_accumulator
(
&
acc
))
return
NULL
;
if
(
encoder_listencode_obj
(
s
,
rval
,
obj
,
indent_level
))
{
Py_DECREF
(
rval
);
if
(
encoder_listencode_obj
(
s
,
&
acc
,
obj
,
indent_level
))
{
destroy_accumulator
(
&
acc
);
return
NULL
;
}
return
rval
;
return
finish_accumulator
(
&
acc
)
;
}
static
PyObject
*
...
...
@@ -1313,18 +1415,19 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj)
}
static
int
_steal_
list_append
(
PyObject
*
lst
,
PyObject
*
stolen
)
_steal_
accumulate
(
accumulator
*
acc
,
PyObject
*
stolen
)
{
/* Append stolen and then decrement its reference count */
int
rval
=
PyList_Append
(
lst
,
stolen
);
int
rval
=
accumulate_unicode
(
acc
,
stolen
);
Py_DECREF
(
stolen
);
return
rval
;
}
static
int
encoder_listencode_obj
(
PyEncoderObject
*
s
,
PyObject
*
rval
,
PyObject
*
obj
,
Py_ssize_t
indent_level
)
encoder_listencode_obj
(
PyEncoderObject
*
s
,
accumulator
*
acc
,
PyObject
*
obj
,
Py_ssize_t
indent_level
)
{
/* Encode Python object obj to a JSON term
, rval is a PyList
*/
/* Encode Python object obj to a JSON term */
PyObject
*
newobj
;
int
rv
;
...
...
@@ -1332,38 +1435,38 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi
PyObject
*
cstr
=
_encoded_const
(
obj
);
if
(
cstr
==
NULL
)
return
-
1
;
return
_steal_
list_append
(
rval
,
cstr
);
return
_steal_
accumulate
(
acc
,
cstr
);
}
else
if
(
PyUnicode_Check
(
obj
))
{
PyObject
*
encoded
=
encoder_encode_string
(
s
,
obj
);
if
(
encoded
==
NULL
)
return
-
1
;
return
_steal_
list_append
(
rval
,
encoded
);
return
_steal_
accumulate
(
acc
,
encoded
);
}
else
if
(
PyLong_Check
(
obj
))
{
PyObject
*
encoded
=
PyObject_Str
(
obj
);
if
(
encoded
==
NULL
)
return
-
1
;
return
_steal_
list_append
(
rval
,
encoded
);
return
_steal_
accumulate
(
acc
,
encoded
);
}
else
if
(
PyFloat_Check
(
obj
))
{
PyObject
*
encoded
=
encoder_encode_float
(
s
,
obj
);
if
(
encoded
==
NULL
)
return
-
1
;
return
_steal_
list_append
(
rval
,
encoded
);
return
_steal_
accumulate
(
acc
,
encoded
);
}
else
if
(
PyList_Check
(
obj
)
||
PyTuple_Check
(
obj
))
{
if
(
Py_EnterRecursiveCall
(
" while encoding a JSON object"
))
return
-
1
;
rv
=
encoder_listencode_list
(
s
,
rval
,
obj
,
indent_level
);
rv
=
encoder_listencode_list
(
s
,
acc
,
obj
,
indent_level
);
Py_LeaveRecursiveCall
();
return
rv
;
}
else
if
(
PyDict_Check
(
obj
))
{
if
(
Py_EnterRecursiveCall
(
" while encoding a JSON object"
))
return
-
1
;
rv
=
encoder_listencode_dict
(
s
,
rval
,
obj
,
indent_level
);
rv
=
encoder_listencode_dict
(
s
,
acc
,
obj
,
indent_level
);
Py_LeaveRecursiveCall
();
return
rv
;
}
...
...
@@ -1394,7 +1497,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi
if
(
Py_EnterRecursiveCall
(
" while encoding a JSON object"
))
return
-
1
;
rv
=
encoder_listencode_obj
(
s
,
rval
,
newobj
,
indent_level
);
rv
=
encoder_listencode_obj
(
s
,
acc
,
newobj
,
indent_level
);
Py_LeaveRecursiveCall
();
Py_DECREF
(
newobj
);
...
...
@@ -1414,9 +1517,10 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi
}
static
int
encoder_listencode_dict
(
PyEncoderObject
*
s
,
PyObject
*
rval
,
PyObject
*
dct
,
Py_ssize_t
indent_level
)
encoder_listencode_dict
(
PyEncoderObject
*
s
,
accumulator
*
acc
,
PyObject
*
dct
,
Py_ssize_t
indent_level
)
{
/* Encode Python dict dct a JSON term
, rval is a PyList
*/
/* Encode Python dict dct a JSON term */
static
PyObject
*
open_dict
=
NULL
;
static
PyObject
*
close_dict
=
NULL
;
static
PyObject
*
empty_dict
=
NULL
;
...
...
@@ -1436,7 +1540,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
return
-
1
;
}
if
(
Py_SIZE
(
dct
)
==
0
)
return
PyList_Append
(
rval
,
empty_dict
);
return
accumulate_unicode
(
acc
,
empty_dict
);
if
(
s
->
markers
!=
Py_None
)
{
int
has_key
;
...
...
@@ -1454,7 +1558,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
}
}
if
(
PyList_Append
(
rval
,
open_dict
))
if
(
accumulate_unicode
(
acc
,
open_dict
))
goto
bail
;
if
(
s
->
indent
!=
Py_None
)
{
...
...
@@ -1541,7 +1645,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
}
if
(
idx
)
{
if
(
PyList_Append
(
rval
,
s
->
item_separator
))
if
(
accumulate_unicode
(
acc
,
s
->
item_separator
))
goto
bail
;
}
...
...
@@ -1549,16 +1653,16 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
Py_CLEAR
(
kstr
);
if
(
encoded
==
NULL
)
goto
bail
;
if
(
PyList_Append
(
rval
,
encoded
))
{
if
(
accumulate_unicode
(
acc
,
encoded
))
{
Py_DECREF
(
encoded
);
goto
bail
;
}
Py_DECREF
(
encoded
);
if
(
PyList_Append
(
rval
,
s
->
key_separator
))
if
(
accumulate_unicode
(
acc
,
s
->
key_separator
))
goto
bail
;
value
=
PyTuple_GET_ITEM
(
item
,
1
);
if
(
encoder_listencode_obj
(
s
,
rval
,
value
,
indent_level
))
if
(
encoder_listencode_obj
(
s
,
acc
,
value
,
indent_level
))
goto
bail
;
idx
+=
1
;
Py_DECREF
(
item
);
...
...
@@ -1578,7 +1682,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss
yield '\n' + (' ' * (_indent * _current_indent_level))
}*/
if
(
PyList_Append
(
rval
,
close_dict
))
if
(
accumulate_unicode
(
acc
,
close_dict
))
goto
bail
;
return
0
;
...
...
@@ -1592,9 +1696,10 @@ bail:
static
int
encoder_listencode_list
(
PyEncoderObject
*
s
,
PyObject
*
rval
,
PyObject
*
seq
,
Py_ssize_t
indent_level
)
encoder_listencode_list
(
PyEncoderObject
*
s
,
accumulator
*
acc
,
PyObject
*
seq
,
Py_ssize_t
indent_level
)
{
/* Encode Python list seq to a JSON term
, rval is a PyList
*/
/* Encode Python list seq to a JSON term */
static
PyObject
*
open_array
=
NULL
;
static
PyObject
*
close_array
=
NULL
;
static
PyObject
*
empty_array
=
NULL
;
...
...
@@ -1618,7 +1723,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss
num_items
=
PySequence_Fast_GET_SIZE
(
s_fast
);
if
(
num_items
==
0
)
{
Py_DECREF
(
s_fast
);
return
PyList_Append
(
rval
,
empty_array
);
return
accumulate_unicode
(
acc
,
empty_array
);
}
if
(
s
->
markers
!=
Py_None
)
{
...
...
@@ -1638,7 +1743,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss
}
seq_items
=
PySequence_Fast_ITEMS
(
s_fast
);
if
(
PyList_Append
(
rval
,
open_array
))
if
(
accumulate_unicode
(
acc
,
open_array
))
goto
bail
;
if
(
s
->
indent
!=
Py_None
)
{
/* TODO: DOES NOT RUN */
...
...
@@ -1652,10 +1757,10 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss
for
(
i
=
0
;
i
<
num_items
;
i
++
)
{
PyObject
*
obj
=
seq_items
[
i
];
if
(
i
)
{
if
(
PyList_Append
(
rval
,
s
->
item_separator
))
if
(
accumulate_unicode
(
acc
,
s
->
item_separator
))
goto
bail
;
}
if
(
encoder_listencode_obj
(
s
,
rval
,
obj
,
indent_level
))
if
(
encoder_listencode_obj
(
s
,
acc
,
obj
,
indent_level
))
goto
bail
;
}
if
(
ident
!=
NULL
)
{
...
...
@@ -1670,7 +1775,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss
yield '\n' + (' ' * (_indent * _current_indent_level))
}*/
if
(
PyList_Append
(
rval
,
close_array
))
if
(
accumulate_unicode
(
acc
,
close_array
))
goto
bail
;
Py_DECREF
(
s_fast
);
return
0
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment