Commit c6fe84dc authored by scoder's avatar scoder

Merge pull request #186 from intellimath/master

Add unicode slicing support
parents de667b62 7224f33a
...@@ -3470,6 +3470,8 @@ class SliceIndexNode(ExprNode): ...@@ -3470,6 +3470,8 @@ class SliceIndexNode(ExprNode):
if base_type.is_builtin_type: if base_type.is_builtin_type:
# slicing builtin types returns something of the same type # slicing builtin types returns something of the same type
self.type = base_type self.type = base_type
self.base = self.base.as_none_safe_node("'NoneType' object is not subscriptable")
c_int = PyrexTypes.c_py_ssize_t_type c_int = PyrexTypes.c_py_ssize_t_type
if self.start: if self.start:
self.start = self.start.coerce_to(c_int, env) self.start = self.start.coerce_to(c_int, env)
...@@ -3486,6 +3488,11 @@ class SliceIndexNode(ExprNode): ...@@ -3486,6 +3488,11 @@ class SliceIndexNode(ExprNode):
error(self.pos, error(self.pos,
"Slicing is not currently supported for '%s'." % self.type) "Slicing is not currently supported for '%s'." % self.type)
return return
base_result = self.base.result()
result = self.result()
start_code = self.start_code()
stop_code = self.stop_code()
if self.base.type.is_string: if self.base.type.is_string:
base_result = self.base.result() base_result = self.base.result()
if self.base.type != PyrexTypes.c_char_ptr_type: if self.base.type != PyrexTypes.c_char_ptr_type:
...@@ -3493,27 +3500,37 @@ class SliceIndexNode(ExprNode): ...@@ -3493,27 +3500,37 @@ class SliceIndexNode(ExprNode):
if self.stop is None: if self.stop is None:
code.putln( code.putln(
"%s = PyBytes_FromString(%s + %s); %s" % ( "%s = PyBytes_FromString(%s + %s); %s" % (
self.result(), result,
base_result, base_result,
self.start_code(), start_code,
code.error_goto_if_null(self.result(), self.pos))) code.error_goto_if_null(result, self.pos)))
else: else:
code.putln( code.putln(
"%s = PyBytes_FromStringAndSize(%s + %s, %s - %s); %s" % ( "%s = PyBytes_FromStringAndSize(%s + %s, %s - %s); %s" % (
self.result(), self.result(),
base_result, base_result,
self.start_code(), start_code,
self.stop_code(), stop_code,
self.start_code(), start_code,
code.error_goto_if_null(self.result(), self.pos))) code.error_goto_if_null(result, self.pos)))
elif self.base.type is unicode_type:
code.globalstate.use_utility_code(
UtilityCode.load_cached("PyUnicode_Substring", "StringTools.c"))
code.putln(
"%s = __Pyx_PyUnicode_Substring(%s, %s, %s); %s" % (
result,
base_result,
start_code,
stop_code,
code.error_goto_if_null(result, self.pos)))
else: else:
code.putln( code.putln(
"%s = __Pyx_PySequence_GetSlice(%s, %s, %s); %s" % ( "%s = __Pyx_PySequence_GetSlice(%s, %s, %s); %s" % (
self.result(), result,
self.base.py_result(), self.base.py_result(),
self.start_code(), start_code,
self.stop_code(), stop_code,
code.error_goto_if_null(self.result(), self.pos))) code.error_goto_if_null(result, self.pos)))
code.put_gotref(self.py_result()) code.put_gotref(self.py_result())
def generate_assignment_code(self, rhs, code): def generate_assignment_code(self, rhs, code):
...@@ -4958,10 +4975,8 @@ class AttributeNode(ExprNode): ...@@ -4958,10 +4975,8 @@ class AttributeNode(ExprNode):
def generate_result_code(self, code): def generate_result_code(self, code):
if self.is_py_attr: if self.is_py_attr:
code.globalstate.use_utility_code(
UtilityCode.load_cached("PyObjectGetAttrStr", "ObjectHandling.c"))
code.putln( code.putln(
'%s = __Pyx_PyObject_GetAttrStr(%s, %s); %s' % ( '%s = PyObject_GetAttr(%s, %s); %s' % (
self.result(), self.result(),
self.obj.py_result(), self.obj.py_result(),
code.intern_identifier(self.attribute), code.intern_identifier(self.attribute),
...@@ -10229,13 +10244,33 @@ class DocstringRefNode(ExprNode): ...@@ -10229,13 +10244,33 @@ class DocstringRefNode(ExprNode):
code.put_gotref(self.result()) code.put_gotref(self.result())
#------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------
# #
# Runtime support code # Runtime support code
# #
#------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------
get_name_interned_utility_code = UtilityCode.load("GetGlobalName", "ObjectHandling.c") get_name_interned_utility_code = UtilityCode(
proto = """
static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/
""",
impl = """
static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name) {
PyObject *result;
result = PyObject_GetAttr(dict, name);
if (!result) {
if (dict != %(BUILTINS)s) {
PyErr_Clear();
result = PyObject_GetAttr(%(BUILTINS)s, name);
}
if (!result) {
PyErr_SetObject(PyExc_NameError, name);
}
}
return result;
}
""" % {'BUILTINS' : Naming.builtins_cname})
#------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------
......
...@@ -374,3 +374,47 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytes( ...@@ -374,3 +374,47 @@ static CYTHON_INLINE PyObject* __Pyx_decode_bytes(
return PyUnicode_Decode(cstring, length, encoding, errors); return PyUnicode_Decode(cstring, length, encoding, errors);
} }
} }
/////////////// PyUnicode_Substring.proto ///////////////
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
PyObject* text, Py_ssize_t start, Py_ssize_t stop);
/////////////// PyUnicode_Substring ///////////////
#if CYTHON_PEP393_ENABLED
#define __Pyx_PyUnicode_SUBSTRING(text, start, stop) \
PyUnicode_FromKindAndData(PyUnicode_KIND(text), PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start)
#else
#define __Pyx_PyUnicode_SUBSTRING(text, start, stop) \
PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start)
#endif
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_Substring(
PyObject* text, Py_ssize_t start, Py_ssize_t stop) {
Py_ssize_t length;
#if CYTHON_PEP393_ENABLED
if (PyUnicode_READY(self) == -1)
return NULL;
#endif
#if CYTHON_PEP393_ENABLED
length = PyUnicode_GET_LENGTH(text);
#else
length = PyUnicode_GET_SIZE(text);
#endif
if (start < 0) {
start += length;
if (start < 0)
start = 0;
}
if (stop < 0)
stop += length;
if (stop > length)
stop = length;
length = stop - start;
if (length <= 0)
return PyUnicode_FromUnicode(NULL, 0);
return (PyObject*)__Pyx_PyUnicode_SUBSTRING(text, start, stop);
}
\ No newline at end of file
...@@ -133,13 +133,13 @@ def bytes_decode(bytes s, start=None, stop=None): ...@@ -133,13 +133,13 @@ def bytes_decode(bytes s, start=None, stop=None):
AttributeError: 'NoneType' object has no attribute 'decode' AttributeError: 'NoneType' object has no attribute 'decode'
>>> print(bytes_decode(None, 1)) >>> print(bytes_decode(None, 1))
Traceback (most recent call last): Traceback (most recent call last):
AttributeError: 'NoneType' object has no attribute 'decode' TypeError: 'NoneType' object is not subscriptable
>>> print(bytes_decode(None, None, 1)) >>> print(bytes_decode(None, None, 1))
Traceback (most recent call last): Traceback (most recent call last):
AttributeError: 'NoneType' object has no attribute 'decode' TypeError: 'NoneType' object is not subscriptable
>>> print(bytes_decode(None, 0, 1)) >>> print(bytes_decode(None, 0, 1))
Traceback (most recent call last): Traceback (most recent call last):
AttributeError: 'NoneType' object has no attribute 'decode' TypeError: 'NoneType' object is not subscriptable
""" """
if start is None: if start is None:
if stop is None: if stop is None:
...@@ -173,13 +173,13 @@ def bytes_decode_unbound_method(bytes s, start=None, stop=None): ...@@ -173,13 +173,13 @@ def bytes_decode_unbound_method(bytes s, start=None, stop=None):
TypeError: descriptor 'decode' requires a 'bytes' object but received a 'NoneType' TypeError: descriptor 'decode' requires a 'bytes' object but received a 'NoneType'
>>> print(bytes_decode_unbound_method(None, 1)) >>> print(bytes_decode_unbound_method(None, 1))
Traceback (most recent call last): Traceback (most recent call last):
TypeError: descriptor 'decode' requires a 'bytes' object but received a 'NoneType' TypeError: 'NoneType' object is not subscriptable
>>> print(bytes_decode_unbound_method(None, None, 1)) >>> print(bytes_decode_unbound_method(None, None, 1))
Traceback (most recent call last): Traceback (most recent call last):
TypeError: descriptor 'decode' requires a 'bytes' object but received a 'NoneType' TypeError: 'NoneType' object is not subscriptable
>>> print(bytes_decode_unbound_method(None, 0, 1)) >>> print(bytes_decode_unbound_method(None, 0, 1))
Traceback (most recent call last): Traceback (most recent call last):
TypeError: descriptor 'decode' requires a 'bytes' object but received a 'NoneType' TypeError: 'NoneType' object is not subscriptable
""" """
if start is None: if start is None:
if stop is None: if stop is None:
......
__doc__ = u""" __doc__ = u"""
>>> do_slice(b'abcdef', 2, 3) >>> do_slice(b'abcdef', 2, 3)
(b'c', b'cdef', b'ab', b'abcdef') (b'c', b'cdef', b'ab', b'abcdef', b'cdef', b'ab', b'abcdef')
>>> do_slice(b'abcdef', 0, 5) >>> do_slice(b'abcdef', 0, 5)
(b'abcde', b'abcdef', b'', b'abcdef') (b'abcde', b'abcdef', b'', b'abcdef', b'abcdef', b'', b'abcdef')
""" """
import sys import sys
...@@ -12,5 +12,5 @@ if sys.version_info[0] < 3: ...@@ -12,5 +12,5 @@ if sys.version_info[0] < 3:
def do_slice(s, int i, int j): def do_slice(s, int i, int j):
cdef char* ss = s cdef char* ss = s
return ss[i:j], ss[i:], ss[:i], ss[:] return ss[i:j], ss[i:], ss[:i], ss[:], ss[i:None], ss[None:i], ss[None:None]
# coding: utf-8
__doc__ = u"""
>>> do_slice1(u'abcdef', 2, 3)
c
>>> do_slice2(u'abcdef', 2, 3)
cdef
>>> do_slice3(u'abcdef', 2, 3)
ab
>>> do_slice4(u'abcdef', 2, 3)
abcdef
>>> do_slice5(u'abcdef', 2, 3)
cdef
>>> do_slice6(u'abcdef', 2, 3)
ab
>>> do_slice7(u'abcdef', 2, 3)
abcdef
>>> do_slice1(u'abcdef', 2, 10)
cdef
>>> do_slice2(u'abcdef', 2, 10)
cdef
>>> do_slice3(u'abcdef', 2, 10)
ab
>>> do_slice4(u'abcdef', 2, 10)
abcdef
>>> do_slice1(u'abcdef', 0, 5)
abcde
>>> do_slice2(u'abcdef', 0, 5)
abcdef
>>> do_slice3(u'abcdef', 0, 5)
<BLANKLINE>
>>> do_slice4(u'abcdef', 0, 5)
abcdef
>>> do_slice5(u'abcdef', 0, 5)
abcdef
>>> do_slice6(u'abcdef', 0, 5)
<BLANKLINE>
>>> do_slice7(u'abcdef', 0, 5)
abcdef
>>> do_slice1(u'abcdef', -6, -1)
abcde
>>> do_slice2(u'abcdef', -6, -1)
abcdef
>>> do_slice3(u'abcdef', -6, -1)
<BLANKLINE>
>>> do_slice4(u'abcdef', -6, -1)
abcdef
>>> do_slice5(u'abcdef', -6, -1)
abcdef
>>> do_slice6(u'abcdef', -6, -1)
<BLANKLINE>
>>> do_slice7(u'abcdef', -6, -1)
abcdef
>>> do_slice1(u'aАbБcСdДeЕfФ', 2, 8)
bБcСdД
>>> do_slice2(u'aАbБcСdДeЕfФ', 2, 8)
bБcСdДeЕfФ
>>> do_slice3(u'aАbБcСdДeЕfФ', 2, 8)
>>> do_slice4(u'aАbБcСdДeЕfФ', 2, 8)
aАbБcСdДeЕfФ
>>> do_slice5(u'aАbБcСdДeЕfФ', 2, 8)
bБcСdДeЕfФ
>>> do_slice6(u'aАbБcСdДeЕfФ', 2, 8)
>>> do_slice7(u'aАbБcСdДeЕfФ', 2, 8)
aАbБcСdДeЕfФ
>>> do_slice1(u'АБСДЕФ', 2, 4)
СД
>>> do_slice2(u'АБСДЕФ', 2, 4)
СДЕФ
>>> do_slice3(u'АБСДЕФ', 2, 4)
АБ
>>> do_slice4(u'АБСДЕФ', 2, 4)
АБСДЕФ
>>> do_slice5(u'АБСДЕФ', 2, 4)
СДЕФ
>>> do_slice6(u'АБСДЕФ', 2, 4)
АБ
>>> do_slice7(u'АБСДЕФ', 2, 4)
АБСДЕФ
>>> do_slice1(u'АБСДЕФ', -4, -2)
СД
>>> do_slice2(u'АБСДЕФ', -4, -2)
СДЕФ
>>> do_slice3(u'АБСДЕФ', -4, -2)
АБ
>>> do_slice4(u'АБСДЕФ', -4, -2)
АБСДЕФ
>>> do_slice5(u'АБСДЕФ', -4, -2)
СДЕФ
>>> do_slice6(u'АБСДЕФ', -4, -2)
АБ
>>> do_slice7(u'АБСДЕФ', -4, -2)
АБСДЕФ
>>> do_slice1(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> do_slice2(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> do_slice3(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> do_slice4(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> do_slice5(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> do_slice6(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
>>> do_slice7(None, 2, 4)
Traceback (most recent call last):
TypeError: 'NoneType' object is not subscriptable
"""
import sys
if sys.version_info[0] >= 3:
__doc__ = __doc__.replace(u"(u'", u"('").replace(u" u'", u" '")
def do_slice1(unicode s, int i, int j):
print(s[i:j])
def do_slice2(unicode s, int i, int j):
print(s[i:])
def do_slice3(unicode s, int i, int j):
print(s[:i])
def do_slice4(unicode s, int i, int j):
print(s[:])
def do_slice5(unicode s, int i, int j):
print(s[i:None])
def do_slice6(unicode s, int i, int j):
print(s[None:i])
def do_slice7(unicode s, int i, int j):
print(s[None:None])
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment