Commit b6cd014d authored by Victor Stinner's avatar Victor Stinner

Unicode: optimize creating of 1-character strings

parent bff7c968
...@@ -1919,8 +1919,18 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size) ...@@ -1919,8 +1919,18 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
return unicode_empty; return unicode_empty;
} }
assert(size > 0); assert(size > 0);
if (size == 1 && u[0] < 256) if (size == 1) {
return get_latin1_char((unsigned char)u[0]); Py_UCS4 ch = u[0];
if (ch < 256)
return get_latin1_char((unsigned char)ch);
res = PyUnicode_New(1, ch);
if (res == NULL)
return NULL;
PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
assert(_PyUnicode_CheckConsistency(res, 1));
return res;
}
max_char = ucs2lib_find_max_char(u, u + size); max_char = ucs2lib_find_max_char(u, u + size);
res = PyUnicode_New(size, max_char); res = PyUnicode_New(size, max_char);
...@@ -1947,8 +1957,18 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size) ...@@ -1947,8 +1957,18 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
return unicode_empty; return unicode_empty;
} }
assert(size > 0); assert(size > 0);
if (size == 1 && u[0] < 256) if (size == 1) {
return get_latin1_char((unsigned char)u[0]); Py_UCS4 ch = u[0];
if (ch < 256)
return get_latin1_char((unsigned char)ch);
res = PyUnicode_New(1, ch);
if (res == NULL)
return NULL;
PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
assert(_PyUnicode_CheckConsistency(res, 1));
return res;
}
max_char = ucs4lib_find_max_char(u, u + size); max_char = ucs4lib_find_max_char(u, u + size);
res = PyUnicode_New(size, max_char); res = PyUnicode_New(size, max_char);
...@@ -11368,10 +11388,33 @@ unicode_find(PyObject *self, PyObject *args) ...@@ -11368,10 +11388,33 @@ unicode_find(PyObject *self, PyObject *args)
static PyObject * static PyObject *
unicode_getitem(PyObject *self, Py_ssize_t index) unicode_getitem(PyObject *self, Py_ssize_t index)
{ {
Py_UCS4 ch = PyUnicode_ReadChar(self, index); void *data;
if (ch == (Py_UCS4)-1) enum PyUnicode_Kind kind;
Py_UCS4 ch;
PyObject *res;
if (!PyUnicode_Check(self) || PyUnicode_READY(self) == -1) {
PyErr_BadArgument();
return NULL;
}
if (index < 0 || index >= PyUnicode_GET_LENGTH(self)) {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return NULL; return NULL;
return PyUnicode_FromOrdinal(ch); }
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
ch = PyUnicode_READ(kind, data, index);
if (ch < 256)
return get_latin1_char(ch);
res = PyUnicode_New(1, ch);
if (res == NULL)
return NULL;
kind = PyUnicode_KIND(res);
data = PyUnicode_DATA(res);
PyUnicode_WRITE(kind, data, 0, ch);
assert(_PyUnicode_CheckConsistency(res, 1));
return res;
} }
/* Believe it or not, this produces the same value for ASCII strings /* Believe it or not, this produces the same value for ASCII strings
...@@ -12039,7 +12082,6 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) ...@@ -12039,7 +12082,6 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
} }
if (PyUnicode_IS_ASCII(self)) { if (PyUnicode_IS_ASCII(self)) {
kind = PyUnicode_KIND(self);
data = PyUnicode_1BYTE_DATA(self); data = PyUnicode_1BYTE_DATA(self);
return unicode_fromascii(data + start, length); return unicode_fromascii(data + start, length);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment