Commit cc164232 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #28295: Fixed the documentation and added tests for PyUnicode_AsUCS4().

Original patch by Xiang Zhang.
parent 63b5b6fd
...@@ -641,7 +641,7 @@ APIs: ...@@ -641,7 +641,7 @@ APIs:
Copy the string *u* into a UCS4 buffer, including a null character, if Copy the string *u* into a UCS4 buffer, including a null character, if
*copy_null* is set. Returns *NULL* and sets an exception on error (in *copy_null* is set. Returns *NULL* and sets an exception on error (in
particular, a :exc:`ValueError` if *buflen* is smaller than the length of particular, a :exc:`SystemError` if *buflen* is smaller than the length of
*u*). *buffer* is returned on success. *u*). *buffer* is returned on success.
.. versionadded:: 3.3 .. versionadded:: 3.3
......
...@@ -749,7 +749,7 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( ...@@ -749,7 +749,7 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
#endif #endif
/* Copy the string into a UCS4 buffer including the null character if copy_null /* Copy the string into a UCS4 buffer including the null character if copy_null
is set. Return NULL and raise an exception on error. Raise a ValueError if is set. Return NULL and raise an exception on error. Raise a SystemError if
the buffer is smaller than the string. Return buffer on success. the buffer is smaller than the string. Return buffer on success.
buflen is the length of the buffer in (Py_UCS4) characters. */ buflen is the length of the buffer in (Py_UCS4) characters. */
......
...@@ -2687,6 +2687,23 @@ class CAPITest(unittest.TestCase): ...@@ -2687,6 +2687,23 @@ class CAPITest(unittest.TestCase):
self.assertEqual(size, nchar) self.assertEqual(size, nchar)
self.assertEqual(wchar, nonbmp + '\0') self.assertEqual(wchar, nonbmp + '\0')
# Test PyUnicode_AsUCS4()
@support.cpython_only
def test_asucs4(self):
from _testcapi import unicode_asucs4
for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
'a\ud800b\udfffc', '\ud834\udd1e']:
l = len(s)
self.assertEqual(unicode_asucs4(s, l, 1), s+'\0')
self.assertEqual(unicode_asucs4(s, l, 0), s+'\uffff')
self.assertEqual(unicode_asucs4(s, l+1, 1), s+'\0\uffff')
self.assertEqual(unicode_asucs4(s, l+1, 0), s+'\0\uffff')
self.assertRaises(SystemError, unicode_asucs4, s, l-1, 1)
self.assertRaises(SystemError, unicode_asucs4, s, l-2, 0)
s = '\0'.join([s, s])
self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
@support.cpython_only @support.cpython_only
def test_encode_decimal(self): def test_encode_decimal(self):
from _testcapi import unicode_encodedecimal from _testcapi import unicode_encodedecimal
......
...@@ -1829,6 +1829,36 @@ unicode_aswidecharstring(PyObject *self, PyObject *args) ...@@ -1829,6 +1829,36 @@ unicode_aswidecharstring(PyObject *self, PyObject *args)
return Py_BuildValue("(Nn)", result, size); return Py_BuildValue("(Nn)", result, size);
} }
static PyObject *
unicode_asucs4(PyObject *self, PyObject *args)
{
PyObject *unicode, *result;
Py_UCS4 *buffer;
int copy_null;
Py_ssize_t str_len, buf_len;
if (!PyArg_ParseTuple(args, "Unp:unicode_asucs4", &unicode, &str_len, &copy_null)) {
return NULL;
}
buf_len = str_len + 1;
buffer = PyMem_NEW(Py_UCS4, buf_len);
if (buffer == NULL) {
return PyErr_NoMemory();
}
memset(buffer, 0, sizeof(Py_UCS4)*buf_len);
buffer[str_len] = 0xffffU;
if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) {
PyMem_FREE(buffer);
return NULL;
}
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len);
PyMem_FREE(buffer);
return result;
}
static PyObject * static PyObject *
unicode_encodedecimal(PyObject *self, PyObject *args) unicode_encodedecimal(PyObject *self, PyObject *args)
{ {
...@@ -3884,6 +3914,7 @@ static PyMethodDef TestMethods[] = { ...@@ -3884,6 +3914,7 @@ static PyMethodDef TestMethods[] = {
{"test_widechar", (PyCFunction)test_widechar, METH_NOARGS}, {"test_widechar", (PyCFunction)test_widechar, METH_NOARGS},
{"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
{"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
{"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
{"unicode_legacy_string", unicode_legacy_string, METH_VARARGS}, {"unicode_legacy_string", unicode_legacy_string, METH_VARARGS},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment