Commit dee9ab01 authored by Tim Peters's avatar Tim Peters

On c.l.py, Martin v. Löwis said that Py_UNICODE could be of a signed type,

so fiddle Jeremy's fix to live with that.  Also added more comments.

Bugfix candidate (this bug is in all versions of Python, at least since
2.1).
parent 59ff4abf
/* /*
Unicode implementation based on original code by Fredrik Lundh, Unicode implementation based on original code by Fredrik Lundh,
modified by Marc-Andre Lemburg <mal@lemburg.com> according to the modified by Marc-Andre Lemburg <mal@lemburg.com> according to the
...@@ -53,7 +53,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ...@@ -53,7 +53,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
The implementation will keep allocated Unicode memory intact for The implementation will keep allocated Unicode memory intact for
all objects on the free list having a size less than this all objects on the free list having a size less than this
limit. This reduces malloc() overhead for small Unicode objects. limit. This reduces malloc() overhead for small Unicode objects.
At worst this will result in MAX_UNICODE_FREELIST_SIZE * At worst this will result in MAX_UNICODE_FREELIST_SIZE *
(sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT + (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT +
...@@ -98,7 +98,7 @@ static PyUnicodeObject *unicode_latin1[256]; ...@@ -98,7 +98,7 @@ static PyUnicodeObject *unicode_latin1[256];
parameter; it is initialized by _PyUnicode_Init(). parameter; it is initialized by _PyUnicode_Init().
Always use the PyUnicode_SetDefaultEncoding() and Always use the PyUnicode_SetDefaultEncoding() and
PyUnicode_GetDefaultEncoding() APIs to access this global. PyUnicode_GetDefaultEncoding() APIs to access this global.
*/ */
static char unicode_default_encoding[100]; static char unicode_default_encoding[100];
...@@ -122,7 +122,7 @@ int unicode_resize(register PyUnicodeObject *unicode, ...@@ -122,7 +122,7 @@ int unicode_resize(register PyUnicodeObject *unicode,
int length) int length)
{ {
void *oldstr; void *oldstr;
/* Shortcut if there's nothing much to do. */ /* Shortcut if there's nothing much to do. */
if (unicode->length == length) if (unicode->length == length)
goto reset; goto reset;
...@@ -130,10 +130,14 @@ int unicode_resize(register PyUnicodeObject *unicode, ...@@ -130,10 +130,14 @@ int unicode_resize(register PyUnicodeObject *unicode,
/* Resizing shared object (unicode_empty or single character /* Resizing shared object (unicode_empty or single character
objects) in-place is not allowed. Use PyUnicode_Resize() objects) in-place is not allowed. Use PyUnicode_Resize()
instead ! */ instead ! */
if (unicode == unicode_empty || if (unicode == unicode_empty ||
(unicode->length == 1 && (unicode->length == 1 &&
/* XXX Is unicode->str[] always unsigned? */ /* MvL said unicode->str[] may be signed. Python generally assumes
unicode->str[0] < 256U && * an int contains at least 32 bits, and we don't use more than
* 32 bits even in a UCS4 build, so casting to unsigned int should
* be correct.
*/
(unsigned int)unicode->str[0] < 256U &&
unicode_latin1[unicode->str[0]] == unicode)) { unicode_latin1[unicode->str[0]] == unicode)) {
PyErr_SetString(PyExc_SystemError, PyErr_SetString(PyExc_SystemError,
"can't resize shared unicode objects"); "can't resize shared unicode objects");
...@@ -159,12 +163,12 @@ int unicode_resize(register PyUnicodeObject *unicode, ...@@ -159,12 +163,12 @@ int unicode_resize(register PyUnicodeObject *unicode,
unicode->defenc = NULL; unicode->defenc = NULL;
} }
unicode->hash = -1; unicode->hash = -1;
return 0; return 0;
} }
/* We allocate one more byte to make sure the string is /* We allocate one more byte to make sure the string is
Ux0000 terminated -- XXX is this needed ? Ux0000 terminated -- XXX is this needed ?
XXX This allocator could further be enhanced by assuring that the XXX This allocator could further be enhanced by assuring that the
free list never reduces its size below 1. free list never reduces its size below 1.
...@@ -176,7 +180,7 @@ PyUnicodeObject *_PyUnicode_New(int length) ...@@ -176,7 +180,7 @@ PyUnicodeObject *_PyUnicode_New(int length)
{ {
register PyUnicodeObject *unicode; register PyUnicodeObject *unicode;
/* Optimization for empty strings */ /* Optimization fo empty strings */
if (length == 0 && unicode_empty != NULL) { if (length == 0 && unicode_empty != NULL) {
Py_INCREF(unicode_empty); Py_INCREF(unicode_empty);
return unicode_empty; return unicode_empty;
...@@ -190,7 +194,7 @@ PyUnicodeObject *_PyUnicode_New(int length) ...@@ -190,7 +194,7 @@ PyUnicodeObject *_PyUnicode_New(int length)
if (unicode->str) { if (unicode->str) {
/* Keep-Alive optimization: we only upsize the buffer, /* Keep-Alive optimization: we only upsize the buffer,
never downsize it. */ never downsize it. */
if ((unicode->length < length) && if ((unicode->length < length) &&
unicode_resize(unicode, length) < 0) { unicode_resize(unicode, length) < 0) {
PyMem_DEL(unicode->str); PyMem_DEL(unicode->str);
goto onError; goto onError;
...@@ -213,8 +217,12 @@ PyUnicodeObject *_PyUnicode_New(int length) ...@@ -213,8 +217,12 @@ PyUnicodeObject *_PyUnicode_New(int length)
goto onError; goto onError;
} }
/* Initialize the first element to guard against cases where /* Initialize the first element to guard against cases where
the caller fails before initializing str. * the caller fails before initializing str -- unicode_resize()
*/ * reads str[0], and the Keep-Alive optimization can keep memory
* allocated for str alive across a call to unicode_dealloc(unicode).
* We don't want unicode_resize to read uninitialized memory in
* that case.
*/
unicode->str[0] = 0; unicode->str[0] = 0;
unicode->str[length] = 0; unicode->str[length] = 0;
unicode->length = length; unicode->length = length;
...@@ -273,7 +281,7 @@ int PyUnicode_Resize(PyObject **unicode, int length) ...@@ -273,7 +281,7 @@ int PyUnicode_Resize(PyObject **unicode, int length)
/* Resizing unicode_empty and single character objects is not /* Resizing unicode_empty and single character objects is not
possible since these are being shared. We simply return a fresh possible since these are being shared. We simply return a fresh
copy with the same Unicode content. */ copy with the same Unicode content. */
if (v->length != length && if (v->length != length &&
(v == unicode_empty || v->length == 1)) { (v == unicode_empty || v->length == 1)) {
PyUnicodeObject *w = _PyUnicode_New(length); PyUnicodeObject *w = _PyUnicode_New(length);
if (w == NULL) if (w == NULL)
...@@ -324,7 +332,7 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u, ...@@ -324,7 +332,7 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
return (PyObject *)unicode; return (PyObject *)unicode;
} }
} }
unicode = _PyUnicode_New(size); unicode = _PyUnicode_New(size);
if (!unicode) if (!unicode)
return NULL; return NULL;
...@@ -355,7 +363,7 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w, ...@@ -355,7 +363,7 @@ PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
/* Copy the wchar_t data into the new object */ /* Copy the wchar_t data into the new object */
#ifdef HAVE_USABLE_WCHAR_T #ifdef HAVE_USABLE_WCHAR_T
memcpy(unicode->str, w, size * sizeof(wchar_t)); memcpy(unicode->str, w, size * sizeof(wchar_t));
#else #else
{ {
register Py_UNICODE *u; register Py_UNICODE *u;
register int i; register int i;
...@@ -458,7 +466,7 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, ...@@ -458,7 +466,7 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
const char *s = NULL; const char *s = NULL;
int len; int len;
PyObject *v; PyObject *v;
if (obj == NULL) { if (obj == NULL) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return NULL; return NULL;
...@@ -505,13 +513,13 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, ...@@ -505,13 +513,13 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
obj->ob_type->tp_name); obj->ob_type->tp_name);
goto onError; goto onError;
} }
/* Convert to Unicode */ /* Convert to Unicode */
if (len == 0) { if (len == 0) {
Py_INCREF(unicode_empty); Py_INCREF(unicode_empty);
v = (PyObject *)unicode_empty; v = (PyObject *)unicode_empty;
} }
else else
v = PyUnicode_Decode(s, len, encoding, errors); v = PyUnicode_Decode(s, len, encoding, errors);
return v; return v;
...@@ -558,7 +566,7 @@ PyObject *PyUnicode_Decode(const char *s, ...@@ -558,7 +566,7 @@ PyObject *PyUnicode_Decode(const char *s,
} }
Py_DECREF(buffer); Py_DECREF(buffer);
return unicode; return unicode;
onError: onError:
Py_XDECREF(buffer); Py_XDECREF(buffer);
return NULL; return NULL;
...@@ -570,7 +578,7 @@ PyObject *PyUnicode_Encode(const Py_UNICODE *s, ...@@ -570,7 +578,7 @@ PyObject *PyUnicode_Encode(const Py_UNICODE *s,
const char *errors) const char *errors)
{ {
PyObject *v, *unicode; PyObject *v, *unicode;
unicode = PyUnicode_FromUnicode(s, size); unicode = PyUnicode_FromUnicode(s, size);
if (unicode == NULL) if (unicode == NULL)
return NULL; return NULL;
...@@ -584,13 +592,13 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode, ...@@ -584,13 +592,13 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
const char *errors) const char *errors)
{ {
PyObject *v; PyObject *v;
if (!PyUnicode_Check(unicode)) { if (!PyUnicode_Check(unicode)) {
PyErr_BadArgument(); PyErr_BadArgument();
goto onError; goto onError;
} }
if (encoding == NULL) if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding(); encoding = PyUnicode_GetDefaultEncoding();
/* Shortcuts for common default encodings */ /* Shortcuts for common default encodings */
...@@ -620,7 +628,7 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode, ...@@ -620,7 +628,7 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
goto onError; goto onError;
} }
return v; return v;
onError: onError:
return NULL; return NULL;
} }
...@@ -670,7 +678,7 @@ const char *PyUnicode_GetDefaultEncoding(void) ...@@ -670,7 +678,7 @@ const char *PyUnicode_GetDefaultEncoding(void)
int PyUnicode_SetDefaultEncoding(const char *encoding) int PyUnicode_SetDefaultEncoding(const char *encoding)
{ {
PyObject *v; PyObject *v;
/* Make sure the encoding is valid. As side effect, this also /* Make sure the encoding is valid. As side effect, this also
loads the encoding into the codec registry cache. */ loads the encoding into the codec registry cache. */
v = _PyCodec_Lookup(encoding); v = _PyCodec_Lookup(encoding);
...@@ -678,7 +686,7 @@ int PyUnicode_SetDefaultEncoding(const char *encoding) ...@@ -678,7 +686,7 @@ int PyUnicode_SetDefaultEncoding(const char *encoding)
goto onError; goto onError;
Py_DECREF(v); Py_DECREF(v);
strncpy(unicode_default_encoding, strncpy(unicode_default_encoding,
encoding, encoding,
sizeof(unicode_default_encoding)); sizeof(unicode_default_encoding));
return 0; return 0;
...@@ -778,7 +786,7 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler ...@@ -778,7 +786,7 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler
/* see RFC2152 for details */ /* see RFC2152 for details */
static static
char utf7_special[128] = { char utf7_special[128] = {
/* indicate whether a UTF-7 character is special i.e. cannot be directly /* indicate whether a UTF-7 character is special i.e. cannot be directly
encoded: encoded:
...@@ -869,7 +877,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, ...@@ -869,7 +877,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
if ((ch == '-') || !B64CHAR(ch)) { if ((ch == '-') || !B64CHAR(ch)) {
inShift = 0; inShift = 0;
s++; s++;
/* p, charsleft, bitsleft, surrogate = */ DECODE(p, charsleft, bitsleft, surrogate); /* p, charsleft, bitsleft, surrogate = */ DECODE(p, charsleft, bitsleft, surrogate);
if (bitsleft >= 6) { if (bitsleft >= 6) {
/* The shift sequence has a partial character in it. If /* The shift sequence has a partial character in it. If
...@@ -877,26 +885,26 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, ...@@ -877,26 +885,26 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
but that is not the case here */ but that is not the case here */
errmsg = "partial character in shift sequence"; errmsg = "partial character in shift sequence";
goto utf7Error; goto utf7Error;
} }
/* According to RFC2152 the remaining bits should be zero. We /* According to RFC2152 the remaining bits should be zero. We
choose to signal an error/insert a replacement character choose to signal an error/insert a replacement character
here so indicate the potential of a misencoded character. */ here so indicate the potential of a misencoded character. */
/* On x86, a << b == a << (b%32) so make sure that bitsleft != 0 */ /* On x86, a << b == a << (b%32) so make sure that bitsleft != 0 */
if (bitsleft && charsleft << (sizeof(charsleft) * 8 - bitsleft)) { if (bitsleft && charsleft << (sizeof(charsleft) * 8 - bitsleft)) {
errmsg = "non-zero padding bits in shift sequence"; errmsg = "non-zero padding bits in shift sequence";
goto utf7Error; goto utf7Error;
} }
if (ch == '-') { if (ch == '-') {
if ((s < e) && (*(s) == '-')) { if ((s < e) && (*(s) == '-')) {
*p++ = '-'; *p++ = '-';
inShift = 1; inShift = 1;
} }
} else if (SPECIAL(ch,0,0)) { } else if (SPECIAL(ch,0,0)) {
errmsg = "unexpected special character"; errmsg = "unexpected special character";
goto utf7Error; goto utf7Error;
} else { } else {
*p++ = ch; *p++ = ch;
} }
...@@ -922,7 +930,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, ...@@ -922,7 +930,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
else if (SPECIAL(ch,0,0)) { else if (SPECIAL(ch,0,0)) {
errmsg = "unexpected special character"; errmsg = "unexpected special character";
s++; s++;
goto utf7Error; goto utf7Error;
} }
else { else {
*p++ = ch; *p++ = ch;
...@@ -1026,7 +1034,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, ...@@ -1026,7 +1034,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
/* out, charsleft, bitsleft = */ ENCODE(out, charsleft, bitsleft); /* out, charsleft, bitsleft = */ ENCODE(out, charsleft, bitsleft);
/* If the next character is special then we dont' need to terminate /* If the next character is special then we dont' need to terminate
the shift sequence. If the next character is not a BASE64 character the shift sequence. If the next character is not a BASE64 character
or '-' then the shift sequence will be terminated implicitly and we or '-' then the shift sequence will be terminated implicitly and we
don't have to insert a '-'. */ don't have to insert a '-'. */
...@@ -1035,7 +1043,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, ...@@ -1035,7 +1043,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
Py_UNICODE ch2 = s[i+1]; Py_UNICODE ch2 = s[i+1];
if (SPECIAL(ch2, encodeSetO, encodeWhiteSpace)) { if (SPECIAL(ch2, encodeSetO, encodeWhiteSpace)) {
} else if (B64CHAR(ch2) || ch2 == '-') { } else if (B64CHAR(ch2) || ch2 == '-') {
*out++ = '-'; *out++ = '-';
inShift = 0; inShift = 0;
...@@ -1049,7 +1057,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, ...@@ -1049,7 +1057,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
inShift = 0; inShift = 0;
} }
} }
} }
} }
} }
if (bitsleft) { if (bitsleft) {
...@@ -1070,7 +1078,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, ...@@ -1070,7 +1078,7 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
/* --- UTF-8 Codec -------------------------------------------------------- */ /* --- UTF-8 Codec -------------------------------------------------------- */
static static
char utf8_code_length[256] = { char utf8_code_length[256] = {
/* Map UTF-8 encoded prefix byte to sequence length. zero means /* Map UTF-8 encoded prefix byte to sequence length. zero means
illegal prefix. see RFC 2279 for details */ illegal prefix. see RFC 2279 for details */
...@@ -1171,7 +1179,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s, ...@@ -1171,7 +1179,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
break; break;
case 3: case 3:
if ((s[1] & 0xc0) != 0x80 || if ((s[1] & 0xc0) != 0x80 ||
(s[2] & 0xc0) != 0x80) { (s[2] & 0xc0) != 0x80) {
errmsg = "invalid data"; errmsg = "invalid data";
startinpos = s-starts; startinpos = s-starts;
...@@ -1181,7 +1189,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s, ...@@ -1181,7 +1189,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f); ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
if (ch < 0x0800) { if (ch < 0x0800) {
/* Note: UTF-8 encodings of surrogates are considered /* Note: UTF-8 encodings of surrogates are considered
legal UTF-8 sequences; legal UTF-8 sequences;
XXX For wide builds (UCS-4) we should probably try XXX For wide builds (UCS-4) we should probably try
to recombine the surrogates into a single code to recombine the surrogates into a single code
...@@ -1222,13 +1230,13 @@ PyObject *PyUnicode_DecodeUTF8(const char *s, ...@@ -1222,13 +1230,13 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
*p++ = (Py_UNICODE)ch; *p++ = (Py_UNICODE)ch;
#else #else
/* compute and append the two surrogates: */ /* compute and append the two surrogates: */
/* translate from 10000..10FFFF to 0..FFFF */ /* translate from 10000..10FFFF to 0..FFFF */
ch -= 0x10000; ch -= 0x10000;
/* high surrogate = top 10 bits added to D800 */ /* high surrogate = top 10 bits added to D800 */
*p++ = (Py_UNICODE)(0xD800 + (ch >> 10)); *p++ = (Py_UNICODE)(0xD800 + (ch >> 10));
/* low surrogate = bottom 10 bits added to DC00 */ /* low surrogate = bottom 10 bits added to DC00 */
*p++ = (Py_UNICODE)(0xDC00 + (ch & 0x03FF)); *p++ = (Py_UNICODE)(0xDC00 + (ch & 0x03FF));
#endif #endif
...@@ -1243,7 +1251,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s, ...@@ -1243,7 +1251,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
} }
s += n; s += n;
continue; continue;
utf8Error: utf8Error:
outpos = p-PyUnicode_AS_UNICODE(unicode); outpos = p-PyUnicode_AS_UNICODE(unicode);
if (unicode_decode_call_errorhandler( if (unicode_decode_call_errorhandler(
...@@ -1437,7 +1445,7 @@ PyUnicode_DecodeUTF16(const char *s, ...@@ -1437,7 +1445,7 @@ PyUnicode_DecodeUTF16(const char *s,
q += 2; q += 2;
bo = 1; bo = 1;
} }
#else #else
if (bom == 0xFEFF) { if (bom == 0xFEFF) {
q += 2; q += 2;
bo = 1; bo = 1;
...@@ -1566,7 +1574,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s, ...@@ -1566,7 +1574,7 @@ PyUnicode_EncodeUTF16(const Py_UNICODE *s,
for (i = pairs = 0; i < size; i++) for (i = pairs = 0; i < size; i++)
if (s[i] >= 0x10000) if (s[i] >= 0x10000)
pairs++; pairs++;
v = PyString_FromStringAndSize(NULL, v = PyString_FromStringAndSize(NULL,
2 * (size + pairs + (byteorder == 0))); 2 * (size + pairs + (byteorder == 0)));
if (v == NULL) if (v == NULL)
return NULL; return NULL;
...@@ -1887,25 +1895,25 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, ...@@ -1887,25 +1895,25 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
if (quotes) { if (quotes) {
*p++ = 'u'; *p++ = 'u';
*p++ = (findchar(s, size, '\'') && *p++ = (findchar(s, size, '\'') &&
!findchar(s, size, '"')) ? '"' : '\''; !findchar(s, size, '"')) ? '"' : '\'';
} }
while (size-- > 0) { while (size-- > 0) {
Py_UNICODE ch = *s++; Py_UNICODE ch = *s++;
/* Escape quotes */ /* Escape quotes */
if (quotes && if (quotes &&
(ch == (Py_UNICODE) PyString_AS_STRING(repr)[1] || ch == '\\')) { (ch == (Py_UNICODE) PyString_AS_STRING(repr)[1] || ch == '\\')) {
*p++ = '\\'; *p++ = '\\';
*p++ = (char) ch; *p++ = (char) ch;
continue; continue;
} }
#ifdef Py_UNICODE_WIDE #ifdef Py_UNICODE_WIDE
/* Map 21-bit characters to '\U00xxxxxx' */ /* Map 21-bit characters to '\U00xxxxxx' */
else if (ch >= 0x10000) { else if (ch >= 0x10000) {
int offset = p - PyString_AS_STRING(repr); int offset = p - PyString_AS_STRING(repr);
/* Resize the string if necessary */ /* Resize the string if necessary */
if (offset + 12 > PyString_GET_SIZE(repr)) { if (offset + 12 > PyString_GET_SIZE(repr)) {
if (_PyString_Resize(&repr, PyString_GET_SIZE(repr) + 100)) if (_PyString_Resize(&repr, PyString_GET_SIZE(repr) + 100))
...@@ -1930,7 +1938,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, ...@@ -1930,7 +1938,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
else if (ch >= 0xD800 && ch < 0xDC00) { else if (ch >= 0xD800 && ch < 0xDC00) {
Py_UNICODE ch2; Py_UNICODE ch2;
Py_UCS4 ucs; Py_UCS4 ucs;
ch2 = *s++; ch2 = *s++;
size--; size--;
if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
...@@ -1982,7 +1990,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s, ...@@ -1982,7 +1990,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
*p++ = 'x'; *p++ = 'x';
*p++ = hexdigit[(ch >> 4) & 0x000F]; *p++ = hexdigit[(ch >> 4) & 0x000F];
*p++ = hexdigit[ch & 0x000F]; *p++ = hexdigit[ch & 0x000F];
} }
/* Copy everything else as-is */ /* Copy everything else as-is */
else else
...@@ -2028,7 +2036,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s, ...@@ -2028,7 +2036,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
const char *bs; const char *bs;
PyObject *errorHandler = NULL; PyObject *errorHandler = NULL;
PyObject *exc = NULL; PyObject *exc = NULL;
/* Escaped strings will always be longer than the resulting /* Escaped strings will always be longer than the resulting
Unicode string, so we start with size here and then reduce the Unicode string, so we start with size here and then reduce the
length after conversion to the true value. (But decoding error length after conversion to the true value. (But decoding error
...@@ -2111,7 +2119,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s, ...@@ -2111,7 +2119,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
return (PyObject *)v; return (PyObject *)v;
onError: onError:
Py_XDECREF(v); Py_XDECREF(v);
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
...@@ -2154,7 +2162,7 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, ...@@ -2154,7 +2162,7 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
*p++ = hexdigit[(ch >> 8) & 0xf]; *p++ = hexdigit[(ch >> 8) & 0xf];
*p++ = hexdigit[(ch >> 4) & 0xf]; *p++ = hexdigit[(ch >> 4) & 0xf];
*p++ = hexdigit[ch & 15]; *p++ = hexdigit[ch & 15];
} }
else else
#endif #endif
/* Map 16-bit characters to '\uxxxx' */ /* Map 16-bit characters to '\uxxxx' */
...@@ -2193,7 +2201,7 @@ PyObject *PyUnicode_DecodeLatin1(const char *s, ...@@ -2193,7 +2201,7 @@ PyObject *PyUnicode_DecodeLatin1(const char *s,
{ {
PyUnicodeObject *v; PyUnicodeObject *v;
Py_UNICODE *p; Py_UNICODE *p;
/* Latin-1 is equivalent to the first 256 ordinals in Unicode. */ /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
if (size == 1 && *(unsigned char*)s < 256) { if (size == 1 && *(unsigned char*)s < 256) {
Py_UNICODE r = *(unsigned char*)s; Py_UNICODE r = *(unsigned char*)s;
...@@ -2209,7 +2217,7 @@ PyObject *PyUnicode_DecodeLatin1(const char *s, ...@@ -2209,7 +2217,7 @@ PyObject *PyUnicode_DecodeLatin1(const char *s,
while (size-- > 0) while (size-- > 0)
*p++ = (unsigned char)*s++; *p++ = (unsigned char)*s++;
return (PyObject *)v; return (PyObject *)v;
onError: onError:
Py_XDECREF(v); Py_XDECREF(v);
return NULL; return NULL;
...@@ -2509,13 +2517,13 @@ PyObject *PyUnicode_DecodeASCII(const char *s, ...@@ -2509,13 +2517,13 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
const char *e; const char *e;
PyObject *errorHandler = NULL; PyObject *errorHandler = NULL;
PyObject *exc = NULL; PyObject *exc = NULL;
/* ASCII is equivalent to the first 128 ordinals in Unicode. */ /* ASCII is equivalent to the first 128 ordinals in Unicode. */
if (size == 1 && *(unsigned char*)s < 128) { if (size == 1 && *(unsigned char*)s < 128) {
Py_UNICODE r = *(unsigned char*)s; Py_UNICODE r = *(unsigned char*)s;
return PyUnicode_FromUnicode(&r, 1); return PyUnicode_FromUnicode(&r, 1);
} }
v = _PyUnicode_New(size); v = _PyUnicode_New(size);
if (v == NULL) if (v == NULL)
goto onError; goto onError;
...@@ -2547,7 +2555,7 @@ PyObject *PyUnicode_DecodeASCII(const char *s, ...@@ -2547,7 +2555,7 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
return (PyObject *)v; return (PyObject *)v;
onError: onError:
Py_XDECREF(v); Py_XDECREF(v);
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
...@@ -2665,7 +2673,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, ...@@ -2665,7 +2673,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
int extrachars = 0; int extrachars = 0;
PyObject *errorHandler = NULL; PyObject *errorHandler = NULL;
PyObject *exc = NULL; PyObject *exc = NULL;
/* Default to Latin-1 */ /* Default to Latin-1 */
if (mapping == NULL) if (mapping == NULL)
return PyUnicode_DecodeLatin1(s, size, errors); return PyUnicode_DecodeLatin1(s, size, errors);
...@@ -2738,7 +2746,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, ...@@ -2738,7 +2746,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
int needed = (targetsize - extrachars) + \ int needed = (targetsize - extrachars) + \
(targetsize << 2); (targetsize << 2);
extrachars += needed; extrachars += needed;
if (_PyUnicode_Resize(&v, if (_PyUnicode_Resize(&v,
PyUnicode_GET_SIZE(v) + needed) < 0) { PyUnicode_GET_SIZE(v) + needed) < 0) {
Py_DECREF(x); Py_DECREF(x);
goto onError; goto onError;
...@@ -2769,7 +2777,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, ...@@ -2769,7 +2777,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
return (PyObject *)v; return (PyObject *)v;
onError: onError:
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
...@@ -3421,7 +3429,7 @@ PyObject *PyUnicode_Translate(PyObject *str, ...@@ -3421,7 +3429,7 @@ PyObject *PyUnicode_Translate(PyObject *str,
const char *errors) const char *errors)
{ {
PyObject *result; PyObject *result;
str = PyUnicode_FromObject(str); str = PyUnicode_FromObject(str);
if (str == NULL) if (str == NULL)
goto onError; goto onError;
...@@ -3431,12 +3439,12 @@ PyObject *PyUnicode_Translate(PyObject *str, ...@@ -3431,12 +3439,12 @@ PyObject *PyUnicode_Translate(PyObject *str,
errors); errors);
Py_DECREF(str); Py_DECREF(str);
return result; return result;
onError: onError:
Py_XDECREF(str); Py_XDECREF(str);
return NULL; return NULL;
} }
/* --- Decimal Encoder ---------------------------------------------------- */ /* --- Decimal Encoder ---------------------------------------------------- */
int PyUnicode_EncodeDecimal(Py_UNICODE *s, int PyUnicode_EncodeDecimal(Py_UNICODE *s,
...@@ -3469,7 +3477,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s, ...@@ -3469,7 +3477,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
Py_UNICODE *uni2; Py_UNICODE *uni2;
Py_UNICODE *collstart; Py_UNICODE *collstart;
Py_UNICODE *collend; Py_UNICODE *collend;
if (Py_UNICODE_ISSPACE(ch)) { if (Py_UNICODE_ISSPACE(ch)) {
*output++ = ' '; *output++ = ' ';
++p; ++p;
...@@ -3570,7 +3578,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s, ...@@ -3570,7 +3578,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
/* --- Helpers ------------------------------------------------------------ */ /* --- Helpers ------------------------------------------------------------ */
static static
int count(PyUnicodeObject *self, int count(PyUnicodeObject *self,
int start, int start,
int end, int end,
...@@ -3610,7 +3618,7 @@ int PyUnicode_Count(PyObject *str, ...@@ -3610,7 +3618,7 @@ int PyUnicode_Count(PyObject *str,
int end) int end)
{ {
int result; int result;
str = PyUnicode_FromObject(str); str = PyUnicode_FromObject(str);
if (str == NULL) if (str == NULL)
return -1; return -1;
...@@ -3619,17 +3627,17 @@ int PyUnicode_Count(PyObject *str, ...@@ -3619,17 +3627,17 @@ int PyUnicode_Count(PyObject *str,
Py_DECREF(str); Py_DECREF(str);
return -1; return -1;
} }
result = count((PyUnicodeObject *)str, result = count((PyUnicodeObject *)str,
start, end, start, end,
(PyUnicodeObject *)substr); (PyUnicodeObject *)substr);
Py_DECREF(str); Py_DECREF(str);
Py_DECREF(substr); Py_DECREF(substr);
return result; return result;
} }
static static
int findstring(PyUnicodeObject *self, int findstring(PyUnicodeObject *self,
PyUnicodeObject *substring, PyUnicodeObject *substring,
int start, int start,
...@@ -3673,7 +3681,7 @@ int PyUnicode_Find(PyObject *str, ...@@ -3673,7 +3681,7 @@ int PyUnicode_Find(PyObject *str,
int direction) int direction)
{ {
int result; int result;
str = PyUnicode_FromObject(str); str = PyUnicode_FromObject(str);
if (str == NULL) if (str == NULL)
return -2; return -2;
...@@ -3682,7 +3690,7 @@ int PyUnicode_Find(PyObject *str, ...@@ -3682,7 +3690,7 @@ int PyUnicode_Find(PyObject *str,
Py_DECREF(str); Py_DECREF(str);
return -2; return -2;
} }
result = findstring((PyUnicodeObject *)str, result = findstring((PyUnicodeObject *)str,
(PyUnicodeObject *)substr, (PyUnicodeObject *)substr,
start, end, direction); start, end, direction);
...@@ -3691,7 +3699,7 @@ int PyUnicode_Find(PyObject *str, ...@@ -3691,7 +3699,7 @@ int PyUnicode_Find(PyObject *str,
return result; return result;
} }
static static
int tailmatch(PyUnicodeObject *self, int tailmatch(PyUnicodeObject *self,
PyUnicodeObject *substring, PyUnicodeObject *substring,
int start, int start,
...@@ -3735,7 +3743,7 @@ int PyUnicode_Tailmatch(PyObject *str, ...@@ -3735,7 +3743,7 @@ int PyUnicode_Tailmatch(PyObject *str,
int direction) int direction)
{ {
int result; int result;
str = PyUnicode_FromObject(str); str = PyUnicode_FromObject(str);
if (str == NULL) if (str == NULL)
return -1; return -1;
...@@ -3744,7 +3752,7 @@ int PyUnicode_Tailmatch(PyObject *str, ...@@ -3744,7 +3752,7 @@ int PyUnicode_Tailmatch(PyObject *str,
Py_DECREF(substr); Py_DECREF(substr);
return -1; return -1;
} }
result = tailmatch((PyUnicodeObject *)str, result = tailmatch((PyUnicodeObject *)str,
(PyUnicodeObject *)substr, (PyUnicodeObject *)substr,
start, end, direction); start, end, direction);
...@@ -3753,7 +3761,7 @@ int PyUnicode_Tailmatch(PyObject *str, ...@@ -3753,7 +3761,7 @@ int PyUnicode_Tailmatch(PyObject *str,
return result; return result;
} }
static static
const Py_UNICODE *findchar(const Py_UNICODE *s, const Py_UNICODE *findchar(const Py_UNICODE *s,
int size, int size,
Py_UNICODE ch) Py_UNICODE ch)
...@@ -3772,7 +3780,7 @@ const Py_UNICODE *findchar(const Py_UNICODE *s, ...@@ -3772,7 +3780,7 @@ const Py_UNICODE *findchar(const Py_UNICODE *s,
/* Apply fixfct filter to the Unicode object self and return a /* Apply fixfct filter to the Unicode object self and return a
reference to the modified object */ reference to the modified object */
static static
PyObject *fixup(PyUnicodeObject *self, PyObject *fixup(PyUnicodeObject *self,
int (*fixfct)(PyUnicodeObject *s)) int (*fixfct)(PyUnicodeObject *s))
{ {
...@@ -3796,16 +3804,16 @@ PyObject *fixup(PyUnicodeObject *self, ...@@ -3796,16 +3804,16 @@ PyObject *fixup(PyUnicodeObject *self,
return (PyObject*) u; return (PyObject*) u;
} }
static static
int fixupper(PyUnicodeObject *self) int fixupper(PyUnicodeObject *self)
{ {
int len = self->length; int len = self->length;
Py_UNICODE *s = self->str; Py_UNICODE *s = self->str;
int status = 0; int status = 0;
while (len-- > 0) { while (len-- > 0) {
register Py_UNICODE ch; register Py_UNICODE ch;
ch = Py_UNICODE_TOUPPER(*s); ch = Py_UNICODE_TOUPPER(*s);
if (ch != *s) { if (ch != *s) {
status = 1; status = 1;
...@@ -3817,16 +3825,16 @@ int fixupper(PyUnicodeObject *self) ...@@ -3817,16 +3825,16 @@ int fixupper(PyUnicodeObject *self)
return status; return status;
} }
static static
int fixlower(PyUnicodeObject *self) int fixlower(PyUnicodeObject *self)
{ {
int len = self->length; int len = self->length;
Py_UNICODE *s = self->str; Py_UNICODE *s = self->str;
int status = 0; int status = 0;
while (len-- > 0) { while (len-- > 0) {
register Py_UNICODE ch; register Py_UNICODE ch;
ch = Py_UNICODE_TOLOWER(*s); ch = Py_UNICODE_TOLOWER(*s);
if (ch != *s) { if (ch != *s) {
status = 1; status = 1;
...@@ -3838,13 +3846,13 @@ int fixlower(PyUnicodeObject *self) ...@@ -3838,13 +3846,13 @@ int fixlower(PyUnicodeObject *self)
return status; return status;
} }
static static
int fixswapcase(PyUnicodeObject *self) int fixswapcase(PyUnicodeObject *self)
{ {
int len = self->length; int len = self->length;
Py_UNICODE *s = self->str; Py_UNICODE *s = self->str;
int status = 0; int status = 0;
while (len-- > 0) { while (len-- > 0) {
if (Py_UNICODE_ISUPPER(*s)) { if (Py_UNICODE_ISUPPER(*s)) {
*s = Py_UNICODE_TOLOWER(*s); *s = Py_UNICODE_TOLOWER(*s);
...@@ -3859,13 +3867,13 @@ int fixswapcase(PyUnicodeObject *self) ...@@ -3859,13 +3867,13 @@ int fixswapcase(PyUnicodeObject *self)
return status; return status;
} }
static static
int fixcapitalize(PyUnicodeObject *self) int fixcapitalize(PyUnicodeObject *self)
{ {
int len = self->length; int len = self->length;
Py_UNICODE *s = self->str; Py_UNICODE *s = self->str;
int status = 0; int status = 0;
if (len == 0) if (len == 0)
return 0; return 0;
if (Py_UNICODE_ISLOWER(*s)) { if (Py_UNICODE_ISLOWER(*s)) {
...@@ -3900,19 +3908,19 @@ int fixtitle(PyUnicodeObject *self) ...@@ -3900,19 +3908,19 @@ int fixtitle(PyUnicodeObject *self)
else else
return 0; return 0;
} }
e = p + PyUnicode_GET_SIZE(self); e = p + PyUnicode_GET_SIZE(self);
previous_is_cased = 0; previous_is_cased = 0;
for (; p < e; p++) { for (; p < e; p++) {
register const Py_UNICODE ch = *p; register const Py_UNICODE ch = *p;
if (previous_is_cased) if (previous_is_cased)
*p = Py_UNICODE_TOLOWER(ch); *p = Py_UNICODE_TOLOWER(ch);
else else
*p = Py_UNICODE_TOTITLE(ch); *p = Py_UNICODE_TOTITLE(ch);
if (Py_UNICODE_ISLOWER(ch) || if (Py_UNICODE_ISLOWER(ch) ||
Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISUPPER(ch) ||
Py_UNICODE_ISTITLE(ch)) Py_UNICODE_ISTITLE(ch))
previous_is_cased = 1; previous_is_cased = 1;
else else
...@@ -3949,7 +3957,7 @@ PyObject *PyUnicode_Join(PyObject *separator, ...@@ -3949,7 +3957,7 @@ PyObject *PyUnicode_Join(PyObject *separator,
sep = PyUnicode_AS_UNICODE(separator); sep = PyUnicode_AS_UNICODE(separator);
seplen = PyUnicode_GET_SIZE(separator); seplen = PyUnicode_GET_SIZE(separator);
} }
res = _PyUnicode_New(sz); res = _PyUnicode_New(sz);
if (res == NULL) if (res == NULL)
goto onError; goto onError;
...@@ -4013,9 +4021,9 @@ PyObject *PyUnicode_Join(PyObject *separator, ...@@ -4013,9 +4021,9 @@ PyObject *PyUnicode_Join(PyObject *separator,
return NULL; return NULL;
} }
static static
PyUnicodeObject *pad(PyUnicodeObject *self, PyUnicodeObject *pad(PyUnicodeObject *self,
int left, int left,
int right, int right,
Py_UNICODE fill) Py_UNICODE fill)
{ {
...@@ -4112,7 +4120,7 @@ PyObject *PyUnicode_Splitlines(PyObject *string, ...@@ -4112,7 +4120,7 @@ PyObject *PyUnicode_Splitlines(PyObject *string,
for (i = j = 0; i < len; ) { for (i = j = 0; i < len; ) {
int eol; int eol;
/* Find a line and append it */ /* Find a line and append it */
while (i < len && !Py_UNICODE_ISLINEBREAK(data[i])) while (i < len && !Py_UNICODE_ISLINEBREAK(data[i]))
i++; i++;
...@@ -4144,7 +4152,7 @@ PyObject *PyUnicode_Splitlines(PyObject *string, ...@@ -4144,7 +4152,7 @@ PyObject *PyUnicode_Splitlines(PyObject *string,
return NULL; return NULL;
} }
static static
PyObject *split_char(PyUnicodeObject *self, PyObject *split_char(PyUnicodeObject *self,
PyObject *list, PyObject *list,
Py_UNICODE ch, Py_UNICODE ch,
...@@ -4174,7 +4182,7 @@ PyObject *split_char(PyUnicodeObject *self, ...@@ -4174,7 +4182,7 @@ PyObject *split_char(PyUnicodeObject *self,
return NULL; return NULL;
} }
static static
PyObject *split_substring(PyUnicodeObject *self, PyObject *split_substring(PyUnicodeObject *self,
PyObject *list, PyObject *list,
PyUnicodeObject *substring, PyUnicodeObject *substring,
...@@ -4236,7 +4244,7 @@ PyObject *split(PyUnicodeObject *self, ...@@ -4236,7 +4244,7 @@ PyObject *split(PyUnicodeObject *self,
return split_substring(self,list,substring,maxcount); return split_substring(self,list,substring,maxcount);
} }
static static
PyObject *replace(PyUnicodeObject *self, PyObject *replace(PyUnicodeObject *self,
PyUnicodeObject *str1, PyUnicodeObject *str1,
PyUnicodeObject *str2, PyUnicodeObject *str2,
...@@ -4259,13 +4267,13 @@ PyObject *replace(PyUnicodeObject *self, ...@@ -4259,13 +4267,13 @@ PyObject *replace(PyUnicodeObject *self,
} else { } else {
Py_UNICODE u1 = str1->str[0]; Py_UNICODE u1 = str1->str[0];
Py_UNICODE u2 = str2->str[0]; Py_UNICODE u2 = str2->str[0];
u = (PyUnicodeObject*) PyUnicode_FromUnicode( u = (PyUnicodeObject*) PyUnicode_FromUnicode(
NULL, NULL,
self->length self->length
); );
if (u != NULL) { if (u != NULL) {
Py_UNICODE_COPY(u->str, self->str, Py_UNICODE_COPY(u->str, self->str,
self->length); self->length);
for (i = 0; i < u->length; i++) for (i = 0; i < u->length; i++)
if (u->str[i] == u1) { if (u->str[i] == u1) {
...@@ -4327,7 +4335,7 @@ PyObject *replace(PyUnicodeObject *self, ...@@ -4327,7 +4335,7 @@ PyObject *replace(PyUnicodeObject *self,
} }
} }
} }
return (PyObject *) u; return (PyObject *) u;
} }
...@@ -4433,9 +4441,9 @@ unicode_center(PyUnicodeObject *self, PyObject *args) ...@@ -4433,9 +4441,9 @@ unicode_center(PyUnicodeObject *self, PyObject *args)
static short utf16Fixup[32] = static short utf16Fixup[32] =
{ {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0x2000, -0x800, -0x800, -0x800, -0x800 0, 0, 0, 0x2000, -0x800, -0x800, -0x800, -0x800
}; };
...@@ -4449,9 +4457,9 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2) ...@@ -4449,9 +4457,9 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
len1 = str1->length; len1 = str1->length;
len2 = str2->length; len2 = str2->length;
while (len1 > 0 && len2 > 0) { while (len1 > 0 && len2 > 0) {
Py_UNICODE c1, c2; Py_UNICODE c1, c2;
c1 = *s1++; c1 = *s1++;
c2 = *s2++; c2 = *s2++;
...@@ -4464,7 +4472,7 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2) ...@@ -4464,7 +4472,7 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
if (c1 != c2) if (c1 != c2)
return (c1 < c2) ? -1 : 1; return (c1 < c2) ? -1 : 1;
len1--; len2--; len1--; len2--;
} }
...@@ -4483,9 +4491,9 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2) ...@@ -4483,9 +4491,9 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
len1 = str1->length; len1 = str1->length;
len2 = str2->length; len2 = str2->length;
while (len1 > 0 && len2 > 0) { while (len1 > 0 && len2 > 0) {
Py_UNICODE c1, c2; Py_UNICODE c1, c2;
c1 = *s1++; c1 = *s1++;
c2 = *s2++; c2 = *s2++;
...@@ -4651,7 +4659,7 @@ unicode_count(PyUnicodeObject *self, PyObject *args) ...@@ -4651,7 +4659,7 @@ unicode_count(PyUnicodeObject *self, PyObject *args)
(PyObject *)substring); (PyObject *)substring);
if (substring == NULL) if (substring == NULL)
return NULL; return NULL;
if (start < 0) if (start < 0)
start += self->length; start += self->length;
if (start < 0) if (start < 0)
...@@ -4836,7 +4844,7 @@ unicode_index(PyUnicodeObject *self, PyObject *args) ...@@ -4836,7 +4844,7 @@ unicode_index(PyUnicodeObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "O|O&O&:index", &substring, if (!PyArg_ParseTuple(args, "O|O&O&:index", &substring,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL; return NULL;
substring = (PyUnicodeObject *)PyUnicode_FromObject( substring = (PyUnicodeObject *)PyUnicode_FromObject(
(PyObject *)substring); (PyObject *)substring);
if (substring == NULL) if (substring == NULL)
...@@ -4877,7 +4885,7 @@ unicode_islower(PyUnicodeObject *self) ...@@ -4877,7 +4885,7 @@ unicode_islower(PyUnicodeObject *self)
cased = 0; cased = 0;
for (; p < e; p++) { for (; p < e; p++) {
register const Py_UNICODE ch = *p; register const Py_UNICODE ch = *p;
if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch)) if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch))
return PyBool_FromLong(0); return PyBool_FromLong(0);
else if (!cased && Py_UNICODE_ISLOWER(ch)) else if (!cased && Py_UNICODE_ISLOWER(ch))
...@@ -4911,7 +4919,7 @@ unicode_isupper(PyUnicodeObject *self) ...@@ -4911,7 +4919,7 @@ unicode_isupper(PyUnicodeObject *self)
cased = 0; cased = 0;
for (; p < e; p++) { for (; p < e; p++) {
register const Py_UNICODE ch = *p; register const Py_UNICODE ch = *p;
if (Py_UNICODE_ISLOWER(ch) || Py_UNICODE_ISTITLE(ch)) if (Py_UNICODE_ISLOWER(ch) || Py_UNICODE_ISTITLE(ch))
return PyBool_FromLong(0); return PyBool_FromLong(0);
else if (!cased && Py_UNICODE_ISUPPER(ch)) else if (!cased && Py_UNICODE_ISUPPER(ch))
...@@ -4948,7 +4956,7 @@ unicode_istitle(PyUnicodeObject *self) ...@@ -4948,7 +4956,7 @@ unicode_istitle(PyUnicodeObject *self)
previous_is_cased = 0; previous_is_cased = 0;
for (; p < e; p++) { for (; p < e; p++) {
register const Py_UNICODE ch = *p; register const Py_UNICODE ch = *p;
if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch)) { if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch)) {
if (previous_is_cased) if (previous_is_cased)
return PyBool_FromLong(0); return PyBool_FromLong(0);
...@@ -5428,9 +5436,9 @@ PyObject *PyUnicode_Replace(PyObject *obj, ...@@ -5428,9 +5436,9 @@ PyObject *PyUnicode_Replace(PyObject *obj,
Py_DECREF(str1); Py_DECREF(str1);
return NULL; return NULL;
} }
result = replace((PyUnicodeObject *)self, result = replace((PyUnicodeObject *)self,
(PyUnicodeObject *)str1, (PyUnicodeObject *)str1,
(PyUnicodeObject *)str2, (PyUnicodeObject *)str2,
maxcount); maxcount);
Py_DECREF(self); Py_DECREF(self);
Py_DECREF(str1); Py_DECREF(str1);
...@@ -5589,7 +5597,7 @@ PyObject *PyUnicode_Split(PyObject *s, ...@@ -5589,7 +5597,7 @@ PyObject *PyUnicode_Split(PyObject *s,
int maxsplit) int maxsplit)
{ {
PyObject *result; PyObject *result;
s = PyUnicode_FromObject(s); s = PyUnicode_FromObject(s);
if (s == NULL) if (s == NULL)
return NULL; return NULL;
...@@ -5681,9 +5689,9 @@ are deleted."); ...@@ -5681,9 +5689,9 @@ are deleted.");
static PyObject* static PyObject*
unicode_translate(PyUnicodeObject *self, PyObject *table) unicode_translate(PyUnicodeObject *self, PyObject *table)
{ {
return PyUnicode_TranslateCharmap(self->str, return PyUnicode_TranslateCharmap(self->str,
self->length, self->length,
table, table,
"ignore"); "ignore");
} }
...@@ -5938,7 +5946,7 @@ unicode_subscript(PyUnicodeObject* self, PyObject* item) ...@@ -5938,7 +5946,7 @@ unicode_subscript(PyUnicodeObject* self, PyObject* item)
for (cur = start, i = 0; i < slicelength; cur += step, i++) { for (cur = start, i = 0; i < slicelength; cur += step, i++) {
result_buf[i] = source_buf[cur]; result_buf[i] = source_buf[cur];
} }
result = PyUnicode_FromUnicode(result_buf, slicelength); result = PyUnicode_FromUnicode(result_buf, slicelength);
PyMem_FREE(result_buf); PyMem_FREE(result_buf);
return result; return result;
...@@ -5993,7 +6001,7 @@ unicode_buffer_getcharbuf(PyUnicodeObject *self, ...@@ -5993,7 +6001,7 @@ unicode_buffer_getcharbuf(PyUnicodeObject *self,
const void **ptr) const void **ptr)
{ {
PyObject *str; PyObject *str;
if (index != 0) { if (index != 0) {
PyErr_SetString(PyExc_SystemError, PyErr_SetString(PyExc_SystemError,
"accessing non-existent unicode segment"); "accessing non-existent unicode segment");
...@@ -6066,7 +6074,7 @@ formatfloat(Py_UNICODE *buf, ...@@ -6066,7 +6074,7 @@ formatfloat(Py_UNICODE *buf,
worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/ worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
char fmt[20]; char fmt[20];
double x; double x;
x = PyFloat_AsDouble(v); x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred()) if (x == -1.0 && PyErr_Occurred())
return -1; return -1;
...@@ -6087,7 +6095,7 @@ formatfloat(Py_UNICODE *buf, ...@@ -6087,7 +6095,7 @@ formatfloat(Py_UNICODE *buf,
len = 1 + 50 + 1 + prec = 52 + prec len = 1 + 50 + 1 + prec = 52 + prec
If prec=0 the effective precision is 1 (the leading digit is If prec=0 the effective precision is 1 (the leading digit is
always given), therefore increase the length by one. always given), therefore increase the length by one.
*/ */
if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) || if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
...@@ -6160,7 +6168,7 @@ formatint(Py_UNICODE *buf, ...@@ -6160,7 +6168,7 @@ formatint(Py_UNICODE *buf,
if ((flags & F_ALT) && if ((flags & F_ALT) &&
(type == 'x' || type == 'X')) { (type == 'x' || type == 'X')) {
/* When converting under %#x or %#X, there are a number /* When converting under %#x or %#X, there are a number
* of issues that cause pain: * of issues that cause pain:
* - when 0 is being converted, the C standard leaves off * - when 0 is being converted, the C standard leaves off
* the '0x' or '0X', which is inconsistent with other * the '0x' or '0X', which is inconsistent with other
...@@ -6170,9 +6178,9 @@ formatint(Py_UNICODE *buf, ...@@ -6170,9 +6178,9 @@ formatint(Py_UNICODE *buf,
* convert 0 with the '0x' or '0X' * convert 0 with the '0x' or '0X'
* (Metrowerks, Compaq Tru64) * (Metrowerks, Compaq Tru64)
* - there are platforms that give '0x' when converting * - there are platforms that give '0x' when converting
* under %#X, but convert 0 in accordance with the * under %#X, but convert 0 in accordance with the
* standard (OS/2 EMX) * standard (OS/2 EMX)
* *
* We can achieve the desired consistency by inserting our * We can achieve the desired consistency by inserting our
* own '0x' or '0X' prefix, and substituting %x/%X in place * own '0x' or '0X' prefix, and substituting %x/%X in place
* of %#x/%#X. * of %#x/%#X.
...@@ -6180,12 +6188,12 @@ formatint(Py_UNICODE *buf, ...@@ -6180,12 +6188,12 @@ formatint(Py_UNICODE *buf,
* Note that this is the same approach as used in * Note that this is the same approach as used in
* formatint() in stringobject.c * formatint() in stringobject.c
*/ */
PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c", PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
type, prec, type); type, prec, type);
} }
else { else {
PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c", PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
(flags&F_ALT) ? "#" : "", (flags&F_ALT) ? "#" : "",
prec, type); prec, type);
} }
return usprintf(buf, fmt, x); return usprintf(buf, fmt, x);
...@@ -6204,7 +6212,7 @@ formatchar(Py_UNICODE *buf, ...@@ -6204,7 +6212,7 @@ formatchar(Py_UNICODE *buf,
} }
else if (PyString_Check(v)) { else if (PyString_Check(v)) {
if (PyString_GET_SIZE(v) != 1) if (PyString_GET_SIZE(v) != 1)
goto onError; goto onError;
buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0]; buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
} }
...@@ -6260,7 +6268,7 @@ PyObject *PyUnicode_Format(PyObject *format, ...@@ -6260,7 +6268,7 @@ PyObject *PyUnicode_Format(PyObject *format,
PyUnicodeObject *result = NULL; PyUnicodeObject *result = NULL;
PyObject *dict = NULL; PyObject *dict = NULL;
PyObject *uformat; PyObject *uformat;
if (format == NULL || args == NULL) { if (format == NULL || args == NULL) {
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return NULL; return NULL;
...@@ -6324,7 +6332,7 @@ PyObject *PyUnicode_Format(PyObject *format, ...@@ -6324,7 +6332,7 @@ PyObject *PyUnicode_Format(PyObject *format,
if (dict == NULL) { if (dict == NULL) {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
"format requires a mapping"); "format requires a mapping");
goto onError; goto onError;
} }
++fmt; ++fmt;
...@@ -6570,7 +6578,7 @@ PyObject *PyUnicode_Format(PyObject *format, ...@@ -6570,7 +6578,7 @@ PyObject *PyUnicode_Format(PyObject *format,
PyErr_Format(PyExc_ValueError, PyErr_Format(PyExc_ValueError,
"unsupported format character '%c' (0x%x) " "unsupported format character '%c' (0x%x) "
"at index %i", "at index %i",
(31<=c && c<=126) ? (char)c : '?', (31<=c && c<=126) ? (char)c : '?',
(int)c, (int)c,
(int)(fmt -1 - PyUnicode_AS_UNICODE(uformat))); (int)(fmt -1 - PyUnicode_AS_UNICODE(uformat)));
goto onError; goto onError;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment