Commit 5623ac87 authored by Hai Shi's avatar Hai Shi Committed by Xiang Zhang

bpo-37476: Adding tests for asutf8 and asutf8andsize (GH-14531)

parent eed5e9a9
......@@ -2819,6 +2819,34 @@ class CAPITest(unittest.TestCase):
self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
# Test PyUnicode_AsUTF8()
@support.cpython_only
def test_asutf8(self):
from _testcapi import unicode_asutf8
bmp = '\u0100'
bmp2 = '\uffff'
nonbmp = chr(0x10ffff)
self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80')
self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf')
self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf')
self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc')
# Test PyUnicode_AsUTF8AndSize()
@support.cpython_only
def test_asutf8andsize(self):
from _testcapi import unicode_asutf8andsize
bmp = '\u0100'
bmp2 = '\uffff'
nonbmp = chr(0x10ffff)
self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2))
self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3))
self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')
# Test PyUnicode_FindChar()
@support.cpython_only
def test_findchar(self):
......
......@@ -1921,6 +1921,48 @@ unicode_asucs4(PyObject *self, PyObject *args)
return result;
}
static PyObject *
unicode_asutf8(PyObject *self, PyObject *args)
{
PyObject *unicode;
const char *buffer;
if (!PyArg_ParseTuple(args, "U", &unicode)) {
return NULL;
}
buffer = PyUnicode_AsUTF8(unicode);
if (buffer == NULL) {
return NULL;
}
return PyBytes_FromString(buffer);
}
static PyObject *
unicode_asutf8andsize(PyObject *self, PyObject *args)
{
PyObject *unicode, *result;
const char *buffer;
Py_ssize_t utf8_len;
if(!PyArg_ParseTuple(args, "U", &unicode)) {
return NULL;
}
buffer = PyUnicode_AsUTF8AndSize(unicode, &utf8_len);
if (buffer == NULL) {
return NULL;
}
result = PyBytes_FromString(buffer);
if (result == NULL) {
return NULL;
}
return Py_BuildValue("(Nn)", result, utf8_len);
}
static PyObject *
unicode_findchar(PyObject *self, PyObject *args)
{
......@@ -5174,6 +5216,8 @@ static PyMethodDef TestMethods[] = {
{"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
{"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
{"unicode_findchar", unicode_findchar, METH_VARARGS},
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment