Issue #28295: Fixed the documentation and added tests for PyUnicode_AsUCS4().

Original patch by Xiang Zhang.

Issue #28295: Fixed the documentation and added tests for PyUnicode_AsUCS4().
Original patch by Xiang Zhang.
b3648576 · Serhiy Storchaka · 1edebef7 · cc164232 · b3648576 · b3648576
Commit b3648576 authored Oct 02, 2016 by Serhiy Storchaka
4 changed files
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -635,7 +635,7 @@ APIs:

   Copy the string *u* into a UCS4 buffer, including a null character, if
   *copy_null* is set.  Returns *NULL* and sets an exception on error (in
-   particular, a :exc:`ValueError` if *buflen* is smaller than the length of
+   particular, a :exc:`SystemError` if *buflen* is smaller than the length of
   *u*).  *buffer* is returned on success.

   .. versionadded:: 3.3

--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -733,7 +733,7 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
 #endif

 /* Copy the string into a UCS4 buffer including the null character if copy_null
-   is set. Return NULL and raise an exception on error. Raise a ValueError if
+   is set. Return NULL and raise an exception on error. Raise a SystemError if
   the buffer is smaller than the string. Return buffer on success.

   buflen is the length of the buffer in (Py_UCS4) characters. */

--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2711,6 +2711,23 @@ class CAPITest(unittest.TestCase):
        self.assertEqual(size, nchar)
        self.assertEqual(wchar, nonbmp + '\0')

+    # Test PyUnicode_AsUCS4()
+    @support.cpython_only
+    def test_asucs4(self):
+        from _testcapi import unicode_asucs4
+        for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
+                  'a\ud800b\udfffc', '\ud834\udd1e']:
+            l = len(s)
+            self.assertEqual(unicode_asucs4(s, l, 1), s+'\0')
+            self.assertEqual(unicode_asucs4(s, l, 0), s+'\uffff')
+            self.assertEqual(unicode_asucs4(s, l+1, 1), s+'\0\uffff')
+            self.assertEqual(unicode_asucs4(s, l+1, 0), s+'\0\uffff')
+            self.assertRaises(SystemError, unicode_asucs4, s, l-1, 1)
+            self.assertRaises(SystemError, unicode_asucs4, s, l-2, 0)
+            s = '\0'.join([s, s])
+            self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
+            self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
+
    @support.cpython_only
    def test_encode_decimal(self):
        from _testcapi import unicode_encodedecimal

--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1828,6 +1828,36 @@ unicode_aswidecharstring(PyObject *self, PyObject *args)
    return Py_BuildValue("(Nn)", result, size);
 }

+static PyObject *
+unicode_asucs4(PyObject *self, PyObject *args)
+{
+    PyObject *unicode, *result;
+    Py_UCS4 *buffer;
+    int copy_null;
+    Py_ssize_t str_len, buf_len;
+
+    if (!PyArg_ParseTuple(args, "Unp:unicode_asucs4", &unicode, &str_len, &copy_null)) {
+        return NULL;
+    }
+
+    buf_len = str_len + 1;
+    buffer = PyMem_NEW(Py_UCS4, buf_len);
+    if (buffer == NULL) {
+        return PyErr_NoMemory();
+    }
+    memset(buffer, 0, sizeof(Py_UCS4)*buf_len);
+    buffer[str_len] = 0xffffU;
+
+    if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) {
+        PyMem_FREE(buffer);
+        return NULL;
+    }
+
+    result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len);
+    PyMem_FREE(buffer);
+    return result;
+}
+
 static PyObject *
 unicode_encodedecimal(PyObject *self, PyObject *args)
 {
@@ -4030,6 +4060,7 @@ static PyMethodDef TestMethods[] = {
    {"test_widechar",           (PyCFunction)test_widechar,      METH_NOARGS},
    {"unicode_aswidechar",      unicode_aswidechar,              METH_VARARGS},
    {"unicode_aswidecharstring",unicode_aswidecharstring,        METH_VARARGS},
+    {"unicode_asucs4",          unicode_asucs4,                  METH_VARARGS},
    {"unicode_encodedecimal",   unicode_encodedecimal,           METH_VARARGS},
    {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
    {"unicode_legacy_string",   unicode_legacy_string,           METH_VARARGS},