Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
P
Pyston
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Boxiang Sun
Pyston
Commits
bbc95869
Commit
bbc95869
authored
Jul 06, 2015
by
Kevin Modzelewski
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #673 from kmod/perf
Unicode-creation optimizations
parents
a60a1b9a
4ab66c61
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
82 additions
and
84 deletions
+82
-84
from_cpython/Include/stringobject.h
from_cpython/Include/stringobject.h
+1
-3
from_cpython/Objects/unicodeobject.c
from_cpython/Objects/unicodeobject.c
+2
-71
microbenchmarks/re_finditer_bench.py
microbenchmarks/re_finditer_bench.py
+7
-0
microbenchmarks/unicode_split_ubench.py
microbenchmarks/unicode_split_ubench.py
+6
-0
src/codegen/entry.cpp
src/codegen/entry.cpp
+1
-0
src/runtime/str.cpp
src/runtime/str.cpp
+0
-10
src/runtime/types.cpp
src/runtime/types.cpp
+65
-0
No files found.
from_cpython/Include/stringobject.h
View file @
bbc95869
...
...
@@ -112,10 +112,8 @@ PyAPI_FUNC(int) _PyString_CheckInterned(PyObject *) PYSTON_NOEXCEPT;
// length of the unicode string, not the length of the bytes it encodes to in the default
// encoding.
// So, set up a different function for those callers to use.
PyAPI_FUNC
(
Py_ssize_t
)
_PyString_SizeMacro
(
PyObject
*
)
PYSTON_NOEXCEPT
;
#define PyString_GET_SIZE(op) _PyString_SizeMacro((PyObject*)op)
//#define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval)
//
#define PyString_GET_SIZE(op) Py_SIZE(op)
#define PyString_GET_SIZE(op) Py_SIZE(op)
/* _PyString_Join(sep, x) is like sep.join(x). sep must be PyStringObject*,
x must be an iterable object. */
...
...
from_cpython/Objects/unicodeobject.c
View file @
bbc95869
...
...
@@ -101,7 +101,7 @@ static PyUnicodeObject *free_list = NULL;
static
int
numfree
=
0
;
/* The empty Unicode object is shared to improve performance. */
static
PyUnicodeObject
*
unicode_empty
=
NULL
;
PyUnicodeObject
*
unicode_empty
=
NULL
;
#define _Py_RETURN_UNICODE_EMPTY() \
do { \
...
...
@@ -317,76 +317,7 @@ int unicode_resize(register PyUnicodeObject *unicode,
*/
static
PyUnicodeObject
*
_PyUnicode_New
(
Py_ssize_t
length
)
{
register
PyUnicodeObject
*
unicode
;
/* Optimization for empty strings */
if
(
length
==
0
&&
unicode_empty
!=
NULL
)
{
Py_INCREF
(
unicode_empty
);
return
unicode_empty
;
}
/* Ensure we won't overflow the size. */
if
(
length
>
((
PY_SSIZE_T_MAX
/
sizeof
(
Py_UNICODE
))
-
1
))
{
return
(
PyUnicodeObject
*
)
PyErr_NoMemory
();
}
/* Unicode freelist & memory allocation */
if
(
free_list
)
{
unicode
=
free_list
;
free_list
=
*
(
PyUnicodeObject
**
)
unicode
;
numfree
--
;
if
(
unicode
->
str
)
{
/* Keep-Alive optimization: we only upsize the buffer,
never downsize it. */
if
((
unicode
->
length
<
length
)
&&
unicode_resize
(
unicode
,
length
)
<
0
)
{
PyObject_DEL
(
unicode
->
str
);
unicode
->
str
=
NULL
;
}
}
else
{
size_t
new_size
=
sizeof
(
Py_UNICODE
)
*
((
size_t
)
length
+
1
);
unicode
->
str
=
(
Py_UNICODE
*
)
PyObject_MALLOC
(
new_size
);
}
PyObject_INIT
(
unicode
,
&
PyUnicode_Type
);
}
else
{
size_t
new_size
;
unicode
=
PyObject_New
(
PyUnicodeObject
,
&
PyUnicode_Type
);
if
(
unicode
==
NULL
)
return
NULL
;
new_size
=
sizeof
(
Py_UNICODE
)
*
((
size_t
)
length
+
1
);
unicode
->
str
=
(
Py_UNICODE
*
)
PyObject_MALLOC
(
new_size
);
}
if
(
!
unicode
->
str
)
{
PyErr_NoMemory
();
goto
onError
;
}
/* Initialize the first element to guard against cases where
* the caller fails before initializing str -- unicode_resize()
* reads str[0], and the Keep-Alive optimization can keep memory
* allocated for str alive across a call to unicode_dealloc(unicode).
* We don't want unicode_resize to read uninitialized memory in
* that case.
*/
unicode
->
str
[
0
]
=
0
;
unicode
->
str
[
length
]
=
0
;
unicode
->
length
=
length
;
unicode
->
hash
=
-
1
;
unicode
->
defenc
=
NULL
;
return
unicode
;
onError:
/* XXX UNREF/NEWREF interface should be more symmetrical */
_Py_DEC_REFTOTAL
;
_Py_ForgetReference
((
PyObject
*
)
unicode
);
PyObject_Del
(
unicode
);
return
NULL
;
}
extern
PyUnicodeObject
*
_PyUnicode_New
(
Py_ssize_t
length
);
static
void
unicode_dealloc
(
register
PyUnicodeObject
*
unicode
)
...
...
microbenchmarks/re_finditer_bench.py
0 → 100644
View file @
bbc95869
import
re
def
f
():
r
=
re
.
compile
(
" "
)
u
=
"a b c d"
for
i
in
xrange
(
2000000
):
r
.
finditer
(
u
)
f
()
microbenchmarks/unicode_split_ubench.py
0 → 100644
View file @
bbc95869
def
f
():
u
=
"a b c d"
u2
=
u" "
for
i
in
xrange
(
4000000
):
u
.
split
(
u2
)
f
()
src/codegen/entry.cpp
View file @
bbc95869
...
...
@@ -364,6 +364,7 @@ static void handle_sigprof(int signum) {
//#define INVESTIGATE_STAT_TIMER "us_timer_in_jitted_code"
#ifdef INVESTIGATE_STAT_TIMER
static_assert
(
STAT_TIMERS
,
"Stat timers need to be enabled to investigate them"
);
static
uint64_t
*
stat_counter
=
Stats
::
getStatCounter
(
INVESTIGATE_STAT_TIMER
);
static
void
handle_sigprof_investigate_stattimer
(
int
signum
)
{
if
(
StatTimer
::
getCurrentCounter
()
==
stat_counter
)
...
...
src/runtime/str.cpp
View file @
bbc95869
...
...
@@ -2348,16 +2348,6 @@ extern "C" Py_ssize_t PyString_Size(PyObject* op) noexcept {
return
len
;
}
extern
"C"
Py_ssize_t
_PyString_SizeMacro
(
PyObject
*
op
)
noexcept
{
if
(
PyString_Check
(
op
))
return
static_cast
<
BoxedString
*>
(
op
)
->
size
();
if
(
PyUnicode_Check
(
op
))
return
Py_SIZE
(
op
);
RELEASE_ASSERT
(
0
,
"Need to verify the behavior of PyString_GET_SIZE on %s objects"
,
op
->
cls
->
tp_name
);
}
extern
"C"
int
_PyString_Resize
(
PyObject
**
pv
,
Py_ssize_t
newsize
)
noexcept
{
// This is only allowed to be called when there is only one user of the string (ie a refcount of 1 in CPython)
...
...
src/runtime/types.cpp
View file @
bbc95869
...
...
@@ -2850,6 +2850,66 @@ out:
return
result
;
}
void
unicode_visit
(
GCVisitor
*
v
,
Box
*
b
)
{
boxGCHandler
(
v
,
b
);
PyUnicodeObject
*
u
=
(
PyUnicodeObject
*
)
b
;
v
->
visit
(
u
->
str
);
v
->
visit
(
u
->
defenc
);
}
extern
"C"
PyUnicodeObject
*
unicode_empty
;
extern
"C"
PyUnicodeObject
*
_PyUnicode_New
(
Py_ssize_t
length
)
noexcept
{
PyUnicodeObject
*
unicode
;
/* Optimization for empty strings */
if
(
length
==
0
&&
unicode_empty
!=
NULL
)
{
Py_INCREF
(
unicode_empty
);
return
unicode_empty
;
}
/* Ensure we won't overflow the size. */
if
(
length
>
((
PY_SSIZE_T_MAX
/
sizeof
(
Py_UNICODE
))
-
1
))
{
return
(
PyUnicodeObject
*
)
PyErr_NoMemory
();
}
// Pyston change: allocate ->str first, so that if this allocation
// causes a collection, we don't see a half-created unicode object:
size_t
new_size
=
sizeof
(
Py_UNICODE
)
*
((
size_t
)
length
+
1
);
Py_UNICODE
*
str
=
(
Py_UNICODE
*
)
gc_alloc
(
new_size
,
gc
::
GCKind
::
UNTRACKED
);
if
(
!
str
)
return
(
PyUnicodeObject
*
)
PyErr_NoMemory
();
// Do a bunch of inlining + constant folding of this line of CPython's:
// unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
assert
(
PyUnicode_Type
.
tp_basicsize
==
sizeof
(
PyUnicodeObject
));
// use the compile-time constant
unicode
=
(
PyUnicodeObject
*
)
gc_alloc
(
sizeof
(
PyUnicodeObject
),
gc
::
GCKind
::
PYTHON
);
if
(
unicode
==
NULL
)
return
(
PyUnicodeObject
*
)
PyErr_NoMemory
();
// Inline PyObject_INIT:
assert
(
!
PyType_SUPPORTS_WEAKREFS
(
&
PyUnicode_Type
));
assert
(
!
PyUnicode_Type
.
instancesHaveHCAttrs
());
assert
(
!
PyUnicode_Type
.
instancesHaveDictAttrs
());
unicode
->
ob_type
=
(
struct
_typeobject
*
)
&
PyUnicode_Type
;
unicode
->
str
=
str
;
/* Initialize the first element to guard against cases where
* the caller fails before initializing str -- unicode_resize()
* reads str[0], and the Keep-Alive optimization can keep memory
* allocated for str alive across a call to unicode_dealloc(unicode).
* We don't want unicode_resize to read uninitialized memory in
* that case.
*/
unicode
->
str
[
0
]
=
0
;
unicode
->
str
[
length
]
=
0
;
unicode
->
length
=
length
;
unicode
->
hash
=
-
1
;
unicode
->
defenc
=
NULL
;
return
unicode
;
}
bool
TRACK_ALLOCATIONS
=
false
;
void
setupRuntime
()
{
...
...
@@ -3349,6 +3409,11 @@ void setupRuntime() {
weakref_callableproxy
->
simple_destructor
=
proxy_to_tp_clear
;
weakref_callableproxy
->
is_pyston_class
=
true
;
unicode_cls
->
tp_alloc
=
PystonType_GenericAlloc
;
unicode_cls
->
gc_visit
=
unicode_visit
;
unicode_cls
->
tp_dealloc
=
NULL
;
unicode_cls
->
is_pyston_class
=
true
;
assert
(
object_cls
->
tp_setattro
==
PyObject_GenericSetAttr
);
assert
(
none_cls
->
tp_setattro
==
PyObject_GenericSetAttr
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment