Commit a46b2a59 authored by Stefan Behnel's avatar Stefan Behnel

move string related utility functions from Optimize.c and Optimize.py to StringTools.c

parent 04160be8
......@@ -1788,13 +1788,14 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
node = ExprNodes.PythonCapiCallNode(
coerce_node.pos, "__Pyx_PyBytes_GetItemInt",
self.PyBytes_GetItemInt_func_type,
args = [
args=[
arg.base.as_none_safe_node("'NoneType' object is not subscriptable"),
index_node.coerce_to(PyrexTypes.c_py_ssize_t_type, env),
bound_check_node,
],
is_temp = True,
utility_code=load_c_utility('bytes_index'))
is_temp=True,
utility_code=UtilityCode.load_cached(
'bytes_index', 'StringTools.c'))
if coerce_node.type is not PyrexTypes.c_char_type:
node = node.coerce_to(coerce_node.type, env)
return node
......@@ -2342,7 +2343,8 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
method_name = node.function.attribute
if method_name == 'istitle':
# istitle() doesn't directly map to Py_UNICODE_ISTITLE()
utility_code = load_c_utility("py_unicode_istitle")
utility_code = UtilityCode.load_cached(
"py_unicode_istitle", "StringTools.c")
function_name = '__Pyx_Py_UNICODE_ISTITLE'
else:
utility_code = None
......@@ -2898,31 +2900,9 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
args[arg_index] = args[arg_index].coerce_to_boolean(self.current_env())
unicode_tailmatch_utility_code = load_c_utility('unicode_tailmatch')
bytes_tailmatch_utility_code = load_c_utility('bytes_tailmatch')
str_tailmatch_utility_code = UtilityCode(
proto = '''
static CYTHON_INLINE int __Pyx_PyStr_Tailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
Py_ssize_t end, int direction);
''',
# We do not use a C compiler macro here to avoid "unused function"
# warnings for the *_Tailmatch() function that is not being used in
# the specific CPython version. The C compiler will generate the same
# code anyway, and will usually just remove the unused function.
impl = '''
static CYTHON_INLINE int __Pyx_PyStr_Tailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
Py_ssize_t end, int direction)
{
if (PY_MAJOR_VERSION < 3)
return __Pyx_PyBytes_Tailmatch(self, arg, start, end, direction);
else
return __Pyx_PyUnicode_Tailmatch(self, arg, start, end, direction);
}
''',
requires=[unicode_tailmatch_utility_code, bytes_tailmatch_utility_code]
)
unicode_tailmatch_utility_code = UtilityCode.load_cached('unicode_tailmatch', 'StringTools.c')
bytes_tailmatch_utility_code = UtilityCode.load_cached('bytes_tailmatch', 'StringTools.c')
str_tailmatch_utility_code = UtilityCode.load_cached('str_tailmatch', 'StringTools.c')
tpnew_utility_code = UtilityCode(
......
......@@ -112,163 +112,6 @@ static PyObject* __Pyx_PyObject_PopIndex(PyObject* L, Py_ssize_t ix) {
}
/////////////// py_unicode_istitle.proto ///////////////
// Py_UNICODE_ISTITLE() doesn't match unicode.istitle() as the latter
// additionally allows character that comply with Py_UNICODE_ISUPPER()
#if PY_VERSION_HEX < 0x030200A2
static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UNICODE uchar)
#else
static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UCS4 uchar)
#endif
{
return Py_UNICODE_ISTITLE(uchar) || Py_UNICODE_ISUPPER(uchar);
}
/////////////// unicode_tailmatch.proto ///////////////
// Python's unicode.startswith() and unicode.endswith() support a
// tuple of prefixes/suffixes, whereas it's much more common to
// test for a single unicode string.
static int __Pyx_PyUnicode_Tailmatch(PyObject* s, PyObject* substr,
Py_ssize_t start, Py_ssize_t end, int direction) {
if (unlikely(PyTuple_Check(substr))) {
Py_ssize_t i, count = PyTuple_GET_SIZE(substr);
for (i = 0; i < count; i++) {
int result;
#if CYTHON_COMPILING_IN_CPYTHON
result = PyUnicode_Tailmatch(s, PyTuple_GET_ITEM(substr, i),
start, end, direction);
#else
PyObject* sub = PySequence_GetItem(substr, i);
if (unlikely(!sub)) return -1;
result = PyUnicode_Tailmatch(s, sub, start, end, direction);
Py_DECREF(sub);
#endif
if (result) {
return result;
}
}
return 0;
}
return PyUnicode_Tailmatch(s, substr, start, end, direction);
}
/////////////// bytes_tailmatch.proto ///////////////
static int __Pyx_PyBytes_SingleTailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
Py_ssize_t end, int direction)
{
const char* self_ptr = PyBytes_AS_STRING(self);
Py_ssize_t self_len = PyBytes_GET_SIZE(self);
const char* sub_ptr;
Py_ssize_t sub_len;
int retval;
#if PY_VERSION_HEX >= 0x02060000
Py_buffer view;
view.obj = NULL;
#endif
if ( PyBytes_Check(arg) ) {
sub_ptr = PyBytes_AS_STRING(arg);
sub_len = PyBytes_GET_SIZE(arg);
}
#if PY_MAJOR_VERSION < 3
// Python 2.x allows mixing unicode and str
else if ( PyUnicode_Check(arg) ) {
return PyUnicode_Tailmatch(self, arg, start, end, direction);
}
#endif
else {
#if PY_VERSION_HEX < 0x02060000
if (unlikely(PyObject_AsCharBuffer(arg, &sub_ptr, &sub_len)))
return -1;
#else
if (unlikely(PyObject_GetBuffer(self, &view, PyBUF_SIMPLE) == -1))
return -1;
sub_ptr = (const char*) view.buf;
sub_len = view.len;
#endif
}
if (end > self_len)
end = self_len;
else if (end < 0)
end += self_len;
if (end < 0)
end = 0;
if (start < 0)
start += self_len;
if (start < 0)
start = 0;
if (direction > 0) {
/* endswith */
if (end-sub_len > start)
start = end - sub_len;
}
if (start + sub_len <= end)
retval = !memcmp(self_ptr+start, sub_ptr, sub_len);
else
retval = 0;
#if PY_VERSION_HEX >= 0x02060000
if (view.obj)
PyBuffer_Release(&view);
#endif
return retval;
}
static int __Pyx_PyBytes_Tailmatch(PyObject* self, PyObject* substr, Py_ssize_t start,
Py_ssize_t end, int direction)
{
if (unlikely(PyTuple_Check(substr))) {
Py_ssize_t i, count = PyTuple_GET_SIZE(substr);
for (i = 0; i < count; i++) {
int result;
#if CYTHON_COMPILING_IN_CPYTHON
result = __Pyx_PyBytes_SingleTailmatch(self, PyTuple_GET_ITEM(substr, i),
start, end, direction);
#else
PyObject* sub = PySequence_GetItem(substr, i);
if (unlikely(!sub)) return -1;
result = __Pyx_PyBytes_SingleTailmatch(self, sub, start, end, direction);
Py_DECREF(sub);
#endif
if (result) {
return result;
}
}
return 0;
}
return __Pyx_PyBytes_SingleTailmatch(self, substr, start, end, direction);
}
/////////////// bytes_index.proto ///////////////
static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t index, int check_bounds) {
if (check_bounds) {
Py_ssize_t size = PyBytes_GET_SIZE(bytes);
if (unlikely(index >= size) | ((index < 0) & unlikely(index < -size))) {
PyErr_Format(PyExc_IndexError, "string index out of range");
return -1;
}
}
if (index < 0)
index += PyBytes_GET_SIZE(bytes);
return PyBytes_AS_STRING(bytes)[index];
}
/////////////// dict_getitem_default.proto ///////////////
static PyObject* __Pyx_PyDict_GetItemDefault(PyObject* d, PyObject* key, PyObject* default_value); /*proto*/
......
......@@ -418,3 +418,183 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start);
#endif
}
/////////////// py_unicode_istitle.proto ///////////////
// Py_UNICODE_ISTITLE() doesn't match unicode.istitle() as the latter
// additionally allows character that comply with Py_UNICODE_ISUPPER()
#if PY_VERSION_HEX < 0x030200A2
static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UNICODE uchar)
#else
static CYTHON_INLINE int __Pyx_Py_UNICODE_ISTITLE(Py_UCS4 uchar)
#endif
{
return Py_UNICODE_ISTITLE(uchar) || Py_UNICODE_ISUPPER(uchar);
}
/////////////// unicode_tailmatch.proto ///////////////
// Python's unicode.startswith() and unicode.endswith() support a
// tuple of prefixes/suffixes, whereas it's much more common to
// test for a single unicode string.
static int __Pyx_PyUnicode_Tailmatch(PyObject* s, PyObject* substr,
Py_ssize_t start, Py_ssize_t end, int direction) {
if (unlikely(PyTuple_Check(substr))) {
Py_ssize_t i, count = PyTuple_GET_SIZE(substr);
for (i = 0; i < count; i++) {
int result;
#if CYTHON_COMPILING_IN_CPYTHON
result = PyUnicode_Tailmatch(s, PyTuple_GET_ITEM(substr, i),
start, end, direction);
#else
PyObject* sub = PySequence_GetItem(substr, i);
if (unlikely(!sub)) return -1;
result = PyUnicode_Tailmatch(s, sub, start, end, direction);
Py_DECREF(sub);
#endif
if (result) {
return result;
}
}
return 0;
}
return PyUnicode_Tailmatch(s, substr, start, end, direction);
}
/////////////// bytes_tailmatch.proto ///////////////
static int __Pyx_PyBytes_SingleTailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
Py_ssize_t end, int direction)
{
const char* self_ptr = PyBytes_AS_STRING(self);
Py_ssize_t self_len = PyBytes_GET_SIZE(self);
const char* sub_ptr;
Py_ssize_t sub_len;
int retval;
#if PY_VERSION_HEX >= 0x02060000
Py_buffer view;
view.obj = NULL;
#endif
if ( PyBytes_Check(arg) ) {
sub_ptr = PyBytes_AS_STRING(arg);
sub_len = PyBytes_GET_SIZE(arg);
}
#if PY_MAJOR_VERSION < 3
// Python 2.x allows mixing unicode and str
else if ( PyUnicode_Check(arg) ) {
return PyUnicode_Tailmatch(self, arg, start, end, direction);
}
#endif
else {
#if PY_VERSION_HEX < 0x02060000
if (unlikely(PyObject_AsCharBuffer(arg, &sub_ptr, &sub_len)))
return -1;
#else
if (unlikely(PyObject_GetBuffer(self, &view, PyBUF_SIMPLE) == -1))
return -1;
sub_ptr = (const char*) view.buf;
sub_len = view.len;
#endif
}
if (end > self_len)
end = self_len;
else if (end < 0)
end += self_len;
if (end < 0)
end = 0;
if (start < 0)
start += self_len;
if (start < 0)
start = 0;
if (direction > 0) {
/* endswith */
if (end-sub_len > start)
start = end - sub_len;
}
if (start + sub_len <= end)
retval = !memcmp(self_ptr+start, sub_ptr, sub_len);
else
retval = 0;
#if PY_VERSION_HEX >= 0x02060000
if (view.obj)
PyBuffer_Release(&view);
#endif
return retval;
}
static int __Pyx_PyBytes_Tailmatch(PyObject* self, PyObject* substr, Py_ssize_t start,
Py_ssize_t end, int direction)
{
if (unlikely(PyTuple_Check(substr))) {
Py_ssize_t i, count = PyTuple_GET_SIZE(substr);
for (i = 0; i < count; i++) {
int result;
#if CYTHON_COMPILING_IN_CPYTHON
result = __Pyx_PyBytes_SingleTailmatch(self, PyTuple_GET_ITEM(substr, i),
start, end, direction);
#else
PyObject* sub = PySequence_GetItem(substr, i);
if (unlikely(!sub)) return -1;
result = __Pyx_PyBytes_SingleTailmatch(self, sub, start, end, direction);
Py_DECREF(sub);
#endif
if (result) {
return result;
}
}
return 0;
}
return __Pyx_PyBytes_SingleTailmatch(self, substr, start, end, direction);
}
/////////////// str_tailmatch.proto ///////////////
static CYTHON_INLINE int __Pyx_PyStr_Tailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
Py_ssize_t end, int direction);
/////////////// str_tailmatch ///////////////
//@requires: bytes_tailmatch
//@requires: unicode_tailmatch
static CYTHON_INLINE int __Pyx_PyStr_Tailmatch(PyObject* self, PyObject* arg, Py_ssize_t start,
Py_ssize_t end, int direction)
{
// We do not use a C compiler macro here to avoid "unused function"
// warnings for the *_Tailmatch() function that is not being used in
// the specific CPython version. The C compiler will generate the same
// code anyway, and will usually just remove the unused function.
if (PY_MAJOR_VERSION < 3)
return __Pyx_PyBytes_Tailmatch(self, arg, start, end, direction);
else
return __Pyx_PyUnicode_Tailmatch(self, arg, start, end, direction);
}
/////////////// bytes_index.proto ///////////////
static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t index, int check_bounds) {
if (check_bounds) {
Py_ssize_t size = PyBytes_GET_SIZE(bytes);
if (unlikely(index >= size) | ((index < 0) & unlikely(index < -size))) {
PyErr_Format(PyExc_IndexError, "string index out of range");
return -1;
}
}
if (index < 0)
index += PyBytes_GET_SIZE(bytes);
return PyBytes_AS_STRING(bytes)[index];
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment