Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
de20b0b5
Commit
de20b0b5
authored
Nov 10, 2011
by
Antoine Pitrou
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #13149: Speed up append-only StringIO objects.
This is very similar to the "lazy strings" idea.
parent
9f4b1e9c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
108 additions
and
5 deletions
+108
-5
Misc/NEWS
Misc/NEWS
+2
-0
Modules/_io/stringio.c
Modules/_io/stringio.c
+105
-4
Objects/unicodeobject.c
Objects/unicodeobject.c
+1
-1
No files found.
Misc/NEWS
View file @
de20b0b5
...
...
@@ -365,6 +365,8 @@ Core and Builtins
Library
-------
-
Issue
#
13149
:
Speed
up
append
-
only
StringIO
objects
.
-
Issue
#
13373
:
multiprocessing
.
Queue
.
get
()
could
sometimes
block
indefinitely
when
called
with
a
timeout
.
Patch
by
Arnaud
Ysmal
.
...
...
Modules/_io/stringio.c
View file @
de20b0b5
...
...
@@ -7,6 +7,9 @@
than the enclosed string, for proper functioning of _PyIO_find_line_ending.
*/
#define STATE_REALIZED 1
#define STATE_ACCUMULATING 2
typedef
struct
{
PyObject_HEAD
Py_UCS4
*
buf
;
...
...
@@ -14,6 +17,15 @@ typedef struct {
Py_ssize_t
string_size
;
size_t
buf_size
;
/* The stringio object can be in two states: accumulating or realized.
In accumulating state, the internal buffer contains nothing and
the contents are given by the embedded _PyAccu structure.
In realized state, the internal buffer is meaningful and the
_PyAccu is destroyed.
*/
int
state
;
_PyAccu
accu
;
char
ok
;
/* initialized? */
char
closed
;
char
readuniversal
;
...
...
@@ -40,6 +52,11 @@ typedef struct {
return NULL; \
}
#define ENSURE_REALIZED(self) \
if (realize(self) < 0) { \
return NULL; \
}
PyDoc_STRVAR
(
stringio_doc
,
"Text I/O implementation using an in-memory buffer.
\n
"
"
\n
"
...
...
@@ -102,6 +119,54 @@ resize_buffer(stringio *self, size_t size)
return
-
1
;
}
static
PyObject
*
make_intermediate
(
stringio
*
self
)
{
PyObject
*
intermediate
=
_PyAccu_Finish
(
&
self
->
accu
);
self
->
state
=
STATE_REALIZED
;
if
(
intermediate
==
NULL
)
return
NULL
;
if
(
_PyAccu_Init
(
&
self
->
accu
)
||
_PyAccu_Accumulate
(
&
self
->
accu
,
intermediate
))
{
Py_DECREF
(
intermediate
);
return
NULL
;
}
self
->
state
=
STATE_ACCUMULATING
;
return
intermediate
;
}
static
int
realize
(
stringio
*
self
)
{
Py_ssize_t
len
;
PyObject
*
intermediate
;
if
(
self
->
state
==
STATE_REALIZED
)
return
0
;
assert
(
self
->
state
==
STATE_ACCUMULATING
);
self
->
state
=
STATE_REALIZED
;
intermediate
=
_PyAccu_Finish
(
&
self
->
accu
);
if
(
intermediate
==
NULL
)
return
-
1
;
/* Append the intermediate string to the internal buffer.
The length should be equal to the current cursor position.
*/
len
=
PyUnicode_GET_LENGTH
(
intermediate
);
if
(
resize_buffer
(
self
,
len
)
<
0
)
{
Py_DECREF
(
intermediate
);
return
-
1
;
}
if
(
!
PyUnicode_AsUCS4
(
intermediate
,
self
->
buf
,
len
,
0
))
{
Py_DECREF
(
intermediate
);
return
-
1
;
}
Py_DECREF
(
intermediate
);
return
0
;
}
/* Internal routine for writing a whole PyUnicode object to the buffer of a
StringIO object. Returns 0 on success, or -1 on error. */
static
Py_ssize_t
...
...
@@ -136,7 +201,6 @@ write_str(stringio *self, PyObject *obj)
return
-
1
;
}
len
=
PyUnicode_GET_LENGTH
(
decoded
);
assert
(
len
>=
0
);
/* This overflow check is not strictly necessary. However, it avoids us to
...
...
@@ -147,6 +211,17 @@ write_str(stringio *self, PyObject *obj)
"new position too large"
);
goto
fail
;
}
if
(
self
->
state
==
STATE_ACCUMULATING
)
{
if
(
self
->
string_size
==
self
->
pos
)
{
if
(
_PyAccu_Accumulate
(
&
self
->
accu
,
decoded
))
goto
fail
;
goto
success
;
}
if
(
realize
(
self
))
goto
fail
;
}
if
(
self
->
pos
+
len
>
self
->
string_size
)
{
if
(
resize_buffer
(
self
,
self
->
pos
+
len
)
<
0
)
goto
fail
;
...
...
@@ -174,6 +249,7 @@ write_str(stringio *self, PyObject *obj)
0
))
goto
fail
;
success:
/* Set the new length of the internal string if it has changed. */
self
->
pos
+=
len
;
if
(
self
->
string_size
<
self
->
pos
)
...
...
@@ -195,6 +271,8 @@ stringio_getvalue(stringio *self)
{
CHECK_INITIALIZED
(
self
);
CHECK_CLOSED
(
self
);
if
(
self
->
state
==
STATE_ACCUMULATING
)
return
make_intermediate
(
self
);
return
PyUnicode_FromKindAndData
(
PyUnicode_4BYTE_KIND
,
self
->
buf
,
self
->
string_size
);
}
...
...
@@ -251,6 +329,14 @@ stringio_read(stringio *self, PyObject *args)
size
=
0
;
}
/* Optimization for seek(0); read() */
if
(
self
->
state
==
STATE_ACCUMULATING
&&
self
->
pos
==
0
&&
size
==
n
)
{
PyObject
*
result
=
make_intermediate
(
self
);
self
->
pos
=
self
->
string_size
;
return
result
;
}
ENSURE_REALIZED
(
self
);
output
=
self
->
buf
+
self
->
pos
;
self
->
pos
+=
size
;
return
PyUnicode_FromKindAndData
(
PyUnicode_4BYTE_KIND
,
output
,
size
);
...
...
@@ -301,6 +387,7 @@ stringio_readline(stringio *self, PyObject *args)
if
(
!
PyArg_ParseTuple
(
args
,
"|O:readline"
,
&
arg
))
return
NULL
;
CHECK_CLOSED
(
self
);
ENSURE_REALIZED
(
self
);
if
(
PyNumber_Check
(
arg
))
{
limit
=
PyNumber_AsSsize_t
(
arg
,
PyExc_OverflowError
);
...
...
@@ -322,6 +409,7 @@ stringio_iternext(stringio *self)
CHECK_INITIALIZED
(
self
);
CHECK_CLOSED
(
self
);
ENSURE_REALIZED
(
self
);
if
(
Py_TYPE
(
self
)
==
&
PyStringIO_Type
)
{
/* Skip method call overhead for speed */
...
...
@@ -392,6 +480,7 @@ stringio_truncate(stringio *self, PyObject *args)
}
if
(
size
<
self
->
string_size
)
{
ENSURE_REALIZED
(
self
);
if
(
resize_buffer
(
self
,
size
)
<
0
)
return
NULL
;
self
->
string_size
=
size
;
...
...
@@ -492,6 +581,7 @@ stringio_close(stringio *self)
/* Free up some memory */
if
(
resize_buffer
(
self
,
0
)
<
0
)
return
NULL
;
_PyAccu_Destroy
(
&
self
->
accu
);
Py_CLEAR
(
self
->
readnl
);
Py_CLEAR
(
self
->
writenl
);
Py_CLEAR
(
self
->
decoder
);
...
...
@@ -521,6 +611,7 @@ stringio_dealloc(stringio *self)
PyMem_Free
(
self
->
buf
);
self
->
buf
=
NULL
;
}
_PyAccu_Destroy
(
&
self
->
accu
);
Py_CLEAR
(
self
->
readnl
);
Py_CLEAR
(
self
->
writenl
);
Py_CLEAR
(
self
->
decoder
);
...
...
@@ -559,6 +650,7 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds)
PyObject
*
value
=
NULL
;
PyObject
*
newline_obj
=
NULL
;
char
*
newline
=
"
\n
"
;
Py_ssize_t
value_len
;
if
(
!
PyArg_ParseTupleAndKeywords
(
args
,
kwds
,
"|OO:__init__"
,
kwlist
,
&
value
,
&
newline_obj
))
...
...
@@ -600,6 +692,7 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds)
self
->
ok
=
0
;
_PyAccu_Destroy
(
&
self
->
accu
);
Py_CLEAR
(
self
->
readnl
);
Py_CLEAR
(
self
->
writenl
);
Py_CLEAR
(
self
->
decoder
);
...
...
@@ -636,19 +729,27 @@ stringio_init(stringio *self, PyObject *args, PyObject *kwds)
/* Now everything is set up, resize buffer to size of initial value,
and copy it */
self
->
string_size
=
0
;
if
(
value
&&
value
!=
Py_None
)
{
Py_ssize_t
len
=
PyUnicode_GetSize
(
value
);
if
(
value
&&
value
!=
Py_None
)
value_len
=
PyUnicode_GetSize
(
value
);
else
value_len
=
0
;
if
(
value_len
>
0
)
{
/* This is a heuristic, for newline translation might change
the string length. */
if
(
resize_buffer
(
self
,
len
)
<
0
)
if
(
resize_buffer
(
self
,
0
)
<
0
)
return
-
1
;
self
->
state
=
STATE_REALIZED
;
self
->
pos
=
0
;
if
(
write_str
(
self
,
value
)
<
0
)
return
-
1
;
}
else
{
/* Empty stringio object, we can start by accumulating */
if
(
resize_buffer
(
self
,
0
)
<
0
)
return
-
1
;
if
(
_PyAccu_Init
(
&
self
->
accu
))
return
-
1
;
self
->
state
=
STATE_ACCUMULATING
;
}
self
->
pos
=
0
;
...
...
Objects/unicodeobject.c
View file @
de20b0b5
...
...
@@ -2055,7 +2055,7 @@ Py_UCS4*
PyUnicode_AsUCS4
(
PyObject
*
string
,
Py_UCS4
*
target
,
Py_ssize_t
targetsize
,
int
copy_null
)
{
if
(
target
==
NULL
||
targetsize
<
1
)
{
if
(
target
==
NULL
||
targetsize
<
0
)
{
PyErr_BadInternalCall
();
return
NULL
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment