Commit b211068f authored by Xiang Zhang's avatar Xiang Zhang

Issue #28822: Adjust indices handling of PyUnicode_FindChar().

parent 38f225dd
...@@ -1625,6 +1625,9 @@ They all return *NULL* or ``-1`` if an exception occurs. ...@@ -1625,6 +1625,9 @@ They all return *NULL* or ``-1`` if an exception occurs.
.. versionadded:: 3.3 .. versionadded:: 3.3
.. versionchanged:: 3.7
*start* and *end* are now adjusted to behave like ``str[start:end]``.
.. c:function:: Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, \ .. c:function:: Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, \
Py_ssize_t start, Py_ssize_t end) Py_ssize_t start, Py_ssize_t end)
......
...@@ -2728,6 +2728,29 @@ class CAPITest(unittest.TestCase): ...@@ -2728,6 +2728,29 @@ class CAPITest(unittest.TestCase):
self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0') self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff') self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
# Test PyUnicode_FindChar()
@support.cpython_only
def test_findchar(self):
from _testcapi import unicode_findchar
for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
for i, ch in enumerate(str):
self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i)
self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i)
str = "!>_<!"
self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1)
self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1)
# start < end
self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4)
self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4)
# start >= end
self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1)
self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1)
# negative
self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)
# Test PyUnicode_CopyCharacters() # Test PyUnicode_CopyCharacters()
@support.cpython_only @support.cpython_only
def test_copycharacters(self): def test_copycharacters(self):
......
...@@ -545,6 +545,9 @@ Windows ...@@ -545,6 +545,9 @@ Windows
C API C API
----- -----
- Issue #28822: The indices parameters *start* and *end* of PyUnicode_FindChar()
are now adjusted to behave like ``str[start:end]``.
- Issue #28808: PyUnicode_CompareWithASCIIString() now never raises exceptions. - Issue #28808: PyUnicode_CompareWithASCIIString() now never raises exceptions.
- Issue #28761: The fields name and doc of structures PyMemberDef, PyGetSetDef, - Issue #28761: The fields name and doc of structures PyMemberDef, PyGetSetDef,
......
...@@ -1887,6 +1887,27 @@ unicode_asucs4(PyObject *self, PyObject *args) ...@@ -1887,6 +1887,27 @@ unicode_asucs4(PyObject *self, PyObject *args)
return result; return result;
} }
static PyObject *
unicode_findchar(PyObject *self, PyObject *args)
{
PyObject *str;
int direction;
unsigned int ch;
Py_ssize_t result;
Py_ssize_t start, end;
if (!PyArg_ParseTuple(args, "UInni:unicode_findchar", &str, &ch,
&start, &end, &direction)) {
return NULL;
}
result = PyUnicode_FindChar(str, (Py_UCS4)ch, start, end, direction);
if (result == -2)
return NULL;
else
return PyLong_FromSsize_t(result);
}
static PyObject * static PyObject *
unicode_copycharacters(PyObject *self, PyObject *args) unicode_copycharacters(PyObject *self, PyObject *args)
{ {
...@@ -4121,6 +4142,7 @@ static PyMethodDef TestMethods[] = { ...@@ -4121,6 +4142,7 @@ static PyMethodDef TestMethods[] = {
{"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
{"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
{"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_asucs4", unicode_asucs4, METH_VARARGS},
{"unicode_findchar", unicode_findchar, METH_VARARGS},
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
{"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
......
...@@ -9461,16 +9461,12 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch, ...@@ -9461,16 +9461,12 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch,
int direction) int direction)
{ {
int kind; int kind;
Py_ssize_t result; Py_ssize_t len, result;
if (PyUnicode_READY(str) == -1) if (PyUnicode_READY(str) == -1)
return -2; return -2;
if (start < 0 || end < 0) { len = PyUnicode_GET_LENGTH(str);
PyErr_SetString(PyExc_IndexError, "string index out of range"); ADJUST_INDICES(start, end, len);
return -2; if (end - start < 1)
}
if (end > PyUnicode_GET_LENGTH(str))
end = PyUnicode_GET_LENGTH(str);
if (start >= end)
return -1; return -1;
kind = PyUnicode_KIND(str); kind = PyUnicode_KIND(str);
result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start, result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment