Commit 8f455858 authored by Fredrik Lundh's avatar Fredrik Lundh

use Py_UNICODE_WIDE instead of USE_UCS4_STORAGE and Py_UNICODE_SIZE

tests.
parent 3f8c2e16
...@@ -66,10 +66,11 @@ Copyright (c) Corporation for National Research Initiatives. ...@@ -66,10 +66,11 @@ Copyright (c) Corporation for National Research Initiatives.
#error Must define Py_UNICODE_SIZE #error Must define Py_UNICODE_SIZE
#endif #endif
/* experimental UCS-4 support. enable at your own risk! */ /* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
#undef USE_UCS4_STORAGE strings are stored as UCS-2 (with limited support for UTF-16) */
#if Py_UNICODE_SIZE == 4
#define USE_UCS4_STORAGE #if Py_UNICODE_SIZE >= 4
#define Py_UNICODE_WIDE
#endif #endif
/* Set these flags if the platform has "wchar.h", "wctype.h" and the /* Set these flags if the platform has "wchar.h", "wctype.h" and the
...@@ -81,12 +82,12 @@ Copyright (c) Corporation for National Research Initiatives. ...@@ -81,12 +82,12 @@ Copyright (c) Corporation for National Research Initiatives.
#ifndef PY_UNICODE_TYPE #ifndef PY_UNICODE_TYPE
/* Windows has a usable wchar_t type (unless we're using UCS-4) */ /* Windows has a usable wchar_t type (unless we're using UCS-4) */
# if defined(MS_WIN32) && !defined(USE_UCS4_STORAGE) # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
# define HAVE_USABLE_WCHAR_T # define HAVE_USABLE_WCHAR_T
# define PY_UNICODE_TYPE wchar_t # define PY_UNICODE_TYPE wchar_t
# endif # endif
# if defined(USE_UCS4_STORAGE) # if defined(Py_UNICODE_WIDE)
# define PY_UNICODE_TYPE Py_UCS4 # define PY_UNICODE_TYPE Py_UCS4
# endif # endif
......
...@@ -13,8 +13,9 @@ ...@@ -13,8 +13,9 @@
#include "sre_constants.h" #include "sre_constants.h"
/* size of a code word (must be unsigned short or larger) */ /* size of a code word (must be unsigned short or larger, and
#ifdef USE_UCS4_STORAGE large enough to hold a Py_UNICODE character) */
#ifdef Py_UNICODE_WIDE
#define SRE_CODE unsigned long #define SRE_CODE unsigned long
#else #else
#define SRE_CODE unsigned short #define SRE_CODE unsigned short
......
...@@ -68,7 +68,7 @@ Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch) ...@@ -68,7 +68,7 @@ Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
else else
ch += ctype->upper; ch += ctype->upper;
#ifdef USE_UCS4_STORAGE #ifdef Py_UNICODE_WIDE
/* The database assumes that the values wrap around at 0x10000. */ /* The database assumes that the values wrap around at 0x10000. */
if (ch > 0x10000) if (ch > 0x10000)
ch -= 0x10000; ch -= 0x10000;
...@@ -360,7 +360,7 @@ Py_UNICODE _PyUnicode_ToUppercase(register Py_UNICODE ch) ...@@ -360,7 +360,7 @@ Py_UNICODE _PyUnicode_ToUppercase(register Py_UNICODE ch)
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
ch += ctype->upper; ch += ctype->upper;
#ifdef USE_UCS4_STORAGE #ifdef Py_UNICODE_WIDE
/* The database assumes that the values wrap around at 0x10000. */ /* The database assumes that the values wrap around at 0x10000. */
if (ch > 0x10000) if (ch > 0x10000)
ch -= 0x10000; ch -= 0x10000;
...@@ -376,7 +376,7 @@ Py_UNICODE _PyUnicode_ToLowercase(register Py_UNICODE ch) ...@@ -376,7 +376,7 @@ Py_UNICODE _PyUnicode_ToLowercase(register Py_UNICODE ch)
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
ch += ctype->lower; ch += ctype->lower;
#ifdef USE_UCS4_STORAGE #ifdef Py_UNICODE_WIDE
/* The database assumes that the values wrap around at 0x10000. */ /* The database assumes that the values wrap around at 0x10000. */
if (ch > 0x10000) if (ch > 0x10000)
ch -= 0x10000; ch -= 0x10000;
......
...@@ -106,7 +106,7 @@ static char unicode_default_encoding[100]; ...@@ -106,7 +106,7 @@ static char unicode_default_encoding[100];
Py_UNICODE Py_UNICODE
PyUnicode_GetMax() PyUnicode_GetMax()
{ {
#ifdef USE_UCS4_STORAGE #ifdef Py_UNICODE_WIDE
return 0x10FFFF; return 0x10FFFF;
#else #else
/* This is actually an illegal character, so it should /* This is actually an illegal character, so it should
...@@ -791,7 +791,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s, ...@@ -791,7 +791,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
errmsg = "illegal encoding"; errmsg = "illegal encoding";
goto utf8Error; goto utf8Error;
} }
#if Py_UNICODE_SIZE == 4 #ifdef Py_UNICODE_WIDE
*p++ = (Py_UNICODE)ch; *p++ = (Py_UNICODE)ch;
#else #else
/* compute and append the two surrogates: */ /* compute and append the two surrogates: */
...@@ -1080,7 +1080,7 @@ PyObject *PyUnicode_DecodeUTF16(const char *s, ...@@ -1080,7 +1080,7 @@ PyObject *PyUnicode_DecodeUTF16(const char *s,
ch2 = (ch2 >> 8) | (ch2 << 8); ch2 = (ch2 >> 8) | (ch2 << 8);
#endif #endif
if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
#if Py_UNICODE_SIZE == 2 #ifndef Py_UNICODE_WIDE
/* This is valid data (a UTF-16 surrogate pair), but /* This is valid data (a UTF-16 surrogate pair), but
we are not able to store this information since our we are not able to store this information since our
Py_UNICODE type only has 16 bits... this might Py_UNICODE type only has 16 bits... this might
...@@ -1326,7 +1326,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, ...@@ -1326,7 +1326,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
*p++ = (Py_UNICODE) chr; *p++ = (Py_UNICODE) chr;
else if (chr <= 0x10ffff) { else if (chr <= 0x10ffff) {
/* UCS-4 character. Either store directly, or as surrogate pair. */ /* UCS-4 character. Either store directly, or as surrogate pair. */
#if Py_UNICODE_SIZE == 4 #ifdef Py_UNICODE_WIDE
*p++ = chr; *p++ = chr;
#else #else
chr -= 0x10000L; chr -= 0x10000L;
......
...@@ -325,7 +325,7 @@ builtin_unichr(PyObject *self, PyObject *args) ...@@ -325,7 +325,7 @@ builtin_unichr(PyObject *self, PyObject *args)
return PyUnicode_FromUnicode(s, 1); return PyUnicode_FromUnicode(s, 1);
} }
else { else {
#if Py_UNICODE_SIZE == 2 #ifndef Py_UNICODE_WIDE
/* UCS-4 character. store as two surrogate characters */ /* UCS-4 character. store as two surrogate characters */
x -= 0x10000L; x -= 0x10000L;
s[0] = 0xD800 + (Py_UNICODE) (x >> 10); s[0] = 0xD800 + (Py_UNICODE) (x >> 10);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment