Commit fa2620f9 authored by zaur's avatar zaur

Add unicode slicing support

parent 2d75b4bf
...@@ -3457,6 +3457,8 @@ class SliceIndexNode(ExprNode): ...@@ -3457,6 +3457,8 @@ class SliceIndexNode(ExprNode):
base_type = self.base.type base_type = self.base.type
if base_type.is_string or base_type.is_cpp_string: if base_type.is_string or base_type.is_cpp_string:
self.type = bytes_type self.type = bytes_type
elif base_type is unicode_type:
self.type = unicode_type
elif base_type.is_ptr: elif base_type.is_ptr:
self.type = base_type self.type = base_type
elif base_type.is_array: elif base_type.is_array:
...@@ -3506,6 +3508,39 @@ class SliceIndexNode(ExprNode): ...@@ -3506,6 +3508,39 @@ class SliceIndexNode(ExprNode):
self.stop_code(), self.stop_code(),
self.start_code(), self.start_code(),
code.error_goto_if_null(self.result(), self.pos))) code.error_goto_if_null(self.result(), self.pos)))
elif self.base.type is unicode_type:
base_result = self.base.result()
code.globalstate.use_utility_code(
UtilityCode.load_cached("PyUnicode_Substring", "StringTools.c"))
if self.start is None:
if self.stop is None:
code.putln(
"%s = __Pyx_PyUnicode_Substring(%s, 0, PY_SSIZE_T_MAX); %s" % (
self.result(),
base_result,
code.error_goto_if_null(self.result(), self.pos)))
else:
code.putln(
"%s = __Pyx_PyUnicode_Substring(%s, 0, %s); %s" % (
self.result(),
base_result,
self.stop_code(),
code.error_goto_if_null(self.result(), self.pos)))
elif self.stop is None:
code.putln(
"%s = __Pyx_PyUnicode_Substring(%s, %s, PY_SSIZE_T_MAX); %s" % (
self.result(),
base_result,
self.start_code(),
code.error_goto_if_null(self.result(), self.pos)))
else:
code.putln(
"%s = __Pyx_PyUnicode_Substring(%s, %s, %s); %s" % (
self.result(),
base_result,
self.start_code(),
self.stop_code(),
code.error_goto_if_null(self.result(), self.pos)))
else: else:
code.putln( code.putln(
"%s = __Pyx_PySequence_GetSlice(%s, %s, %s); %s" % ( "%s = __Pyx_PySequence_GetSlice(%s, %s, %s); %s" % (
...@@ -10403,3 +10438,6 @@ proto=""" ...@@ -10403,3 +10438,6 @@ proto="""
#define UNARY_NEG_WOULD_OVERFLOW(x) \ #define UNARY_NEG_WOULD_OVERFLOW(x) \
(((x) < 0) & ((unsigned long)(x) == 0-(unsigned long)(x))) (((x) < 0) & ((unsigned long)(x) == 0-(unsigned long)(x)))
""") """)
pyunicode_substring = UtilityCode.load_cached("PyUnicode_Substring", "StringTools.c")
...@@ -374,3 +374,42 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytes( ...@@ -374,3 +374,42 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytes(
return PyUnicode_Decode(cstring, length, encoding, errors); return PyUnicode_Decode(cstring, length, encoding, errors);
} }
} }
/////////////// PyUnicode_Substring.proto ///////////////
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
PyObject* text, Py_ssize_t start, Py_ssize_t stop);
/////////////// PyUnicode_Substring ///////////////
#if CYTHON_PEP393_ENABLED
#define __Pyx_PyUnicode_SUBSTRING(text, start, stop) \
PyUnicode_FromKindAndData(PyUnicode_KIND(text), PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start)
#else
#define __Pyx_PyUnicode_SUBSTRING(text, start, stop) \
PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start)
#endif
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
PyObject* text, Py_ssize_t start, Py_ssize_t stop) {
Py_ssize_t length;
#if CYTHON_PEP393_ENABLED
length = PyUnicode_GET_LENGTH(text);
#else
length = PyUnicode_GET_SIZE(text);
#endif
if (start < 0) {
start += length;
if (start < 0)
start = 0;
}
if (stop < 0)
stop += length;
if (stop > length)
stop = length;
length = stop - start;
if (length <= 0)
return PyUnicode_FromUnicode(NULL, 0);
return (PyObject*)__Pyx_PyUnicode_SUBSTRING(text, start, stop);
}
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment