Commit e9ddfbb4 authored by Hye-Shik Chang's avatar Hye-Shik Chang

SF #989185: Drop unicode.iswide() and unicode.width() and add

unicodedata.east_asian_width().  You can still implement your own
simple width() function using it like this:
    def width(u):
        w = 0
        for c in unicodedata.normalize('NFC', u):
            cwidth = unicodedata.east_asian_width(c)
            if cwidth in ('W', 'F'): w += 2
            else: w += 1
        return w
parent b5047fd0
...@@ -894,11 +894,6 @@ functions depending on the Python configuration. ...@@ -894,11 +894,6 @@ functions depending on the Python configuration.
character. character.
\end{cfuncdesc} \end{cfuncdesc}
\begin{cfuncdesc}{int}{Py_UNICODE_ISWIDE}{Py_UNICODE ch}
Returns 1/0 depending on whether \var{ch} is a wide or full-width
character.
\end{cfuncdesc}
These APIs can be used for fast direct character conversions: These APIs can be used for fast direct character conversions:
\begin{cfuncdesc}{Py_UNICODE}{Py_UNICODE_TOLOWER}{Py_UNICODE ch} \begin{cfuncdesc}{Py_UNICODE}{Py_UNICODE_TOLOWER}{Py_UNICODE ch}
...@@ -957,10 +952,6 @@ use these APIs: ...@@ -957,10 +952,6 @@ use these APIs:
Return the length of the Unicode object. Return the length of the Unicode object.
\end{cfuncdesc} \end{cfuncdesc}
\begin{cfuncdesc}{int}{PyUnicode_GetWidth}{PyObject *unicode}
Return the fixed-width representation length of the Unicode object.
\end{cfuncdesc}
\begin{cfuncdesc}{PyObject*}{PyUnicode_FromEncodedObject}{PyObject *obj, \begin{cfuncdesc}{PyObject*}{PyUnicode_FromEncodedObject}{PyObject *obj,
const char *encoding, const char *encoding,
const char *errors} const char *errors}
......
...@@ -664,12 +664,6 @@ there is at least one cased character, false otherwise. ...@@ -664,12 +664,6 @@ there is at least one cased character, false otherwise.
For 8-bit strings, this method is locale-dependent. For 8-bit strings, this method is locale-dependent.
\end{methoddesc} \end{methoddesc}
\begin{methoddesc}[string]{iswide}{}
Return true if all characters in the string are wide or full width and
there is at least one wide or full width character, false otherwise.
This method is supported by unicode type only.
\end{methoddesc}
\begin{methoddesc}[string]{join}{seq} \begin{methoddesc}[string]{join}{seq}
Return a string which is the concatenation of the strings in the Return a string which is the concatenation of the strings in the
sequence \var{seq}. The separator between elements is the string sequence \var{seq}. The separator between elements is the string
...@@ -810,11 +804,6 @@ Return a copy of the string converted to uppercase. ...@@ -810,11 +804,6 @@ Return a copy of the string converted to uppercase.
For 8-bit strings, this method is locale-dependent. For 8-bit strings, this method is locale-dependent.
\end{methoddesc} \end{methoddesc}
\begin{methoddesc}[string]{width}{}
Return length of fixed-width representation of the string. This method
is supported by unicode type only.
\end{methoddesc}
\begin{methoddesc}[string]{zfill}{width} \begin{methoddesc}[string]{zfill}{width}
Return the numeric string left filled with zeros in a string Return the numeric string left filled with zeros in a string
of length \var{width}. The original string is returned if of length \var{width}. The original string is returned if
......
...@@ -71,6 +71,11 @@ defines the following functions: ...@@ -71,6 +71,11 @@ defines the following functions:
class is defined. class is defined.
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{east_asian_width}{unichr}
Returns the east asian width of assigned to the Unicode character
\var{unichr} as string.
\end{funcdesc}
\begin{funcdesc}{mirrored}{unichr} \begin{funcdesc}{mirrored}{unichr}
Returns the mirrored property of assigned to the Unicode character Returns the mirrored property of assigned to the Unicode character
\var{unichr} as integer. Returns \code{1} if the character has been \var{unichr} as integer. Returns \code{1} if the character has been
...@@ -123,4 +128,4 @@ In addition, the module exposes the following constant: ...@@ -123,4 +128,4 @@ In addition, the module exposes the following constant:
The version of the Unicode database used in this module. The version of the Unicode database used in this module.
\versionadded{2.3} \versionadded{2.3}
\end{datadesc} \end{datadesc}
\ No newline at end of file
...@@ -181,7 +181,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE; ...@@ -181,7 +181,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
# define PyUnicode_GetWidth PyUnicodeUCS2_GetWidth
# define PyUnicode_Join PyUnicodeUCS2_Join # define PyUnicode_Join PyUnicodeUCS2_Join
# define PyUnicode_Replace PyUnicodeUCS2_Replace # define PyUnicode_Replace PyUnicodeUCS2_Replace
# define PyUnicode_Resize PyUnicodeUCS2_Resize # define PyUnicode_Resize PyUnicodeUCS2_Resize
...@@ -201,7 +200,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE; ...@@ -201,7 +200,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
# define _PyUnicode_IsWide _PyUnicodeUCS2_IsWide
# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
...@@ -256,7 +254,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE; ...@@ -256,7 +254,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
# define PyUnicode_GetSize PyUnicodeUCS4_GetSize # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
# define PyUnicode_GetWidth PyUnicodeUCS4_GetWidth
# define PyUnicode_Join PyUnicodeUCS4_Join # define PyUnicode_Join PyUnicodeUCS4_Join
# define PyUnicode_Replace PyUnicodeUCS4_Replace # define PyUnicode_Replace PyUnicodeUCS4_Replace
# define PyUnicode_Resize PyUnicodeUCS4_Resize # define PyUnicode_Resize PyUnicodeUCS4_Resize
...@@ -275,7 +272,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE; ...@@ -275,7 +272,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
# define _PyUnicode_IsWide _PyUnicodeUCS4_IsWide
# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
...@@ -321,8 +317,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE; ...@@ -321,8 +317,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
#define Py_UNICODE_ISALPHA(ch) iswalpha(ch) #define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
#define Py_UNICODE_ISWIDE(ch) _PyUnicode_IsWide(ch)
#else #else
#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch) #define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
...@@ -346,8 +340,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE; ...@@ -346,8 +340,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
#define Py_UNICODE_ISWIDE(ch) _PyUnicode_IsWide(ch)
#endif #endif
#define Py_UNICODE_ISALNUM(ch) \ #define Py_UNICODE_ISALNUM(ch) \
...@@ -440,12 +432,6 @@ PyAPI_FUNC(int) PyUnicode_GetSize( ...@@ -440,12 +432,6 @@ PyAPI_FUNC(int) PyUnicode_GetSize(
PyObject *unicode /* Unicode object */ PyObject *unicode /* Unicode object */
); );
/* Get the fixed-width representation length of the Unicode object */
PyAPI_FUNC(int) PyUnicode_GetWidth(
PyObject *unicode /* Unicode object */
);
/* Get the maximum ordinal for a Unicode character. */ /* Get the maximum ordinal for a Unicode character. */
PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void); PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
...@@ -1176,10 +1162,6 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha( ...@@ -1176,10 +1162,6 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha(
Py_UNICODE ch /* Unicode character */ Py_UNICODE ch /* Unicode character */
); );
PyAPI_FUNC(int) _PyUnicode_IsWide(
Py_UNICODE ch /* Unicode character */
);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
......
...@@ -695,28 +695,3 @@ class MixinStrUserStringTest: ...@@ -695,28 +695,3 @@ class MixinStrUserStringTest:
self.checkraises(TypeError, 'xyz', 'decode', 42) self.checkraises(TypeError, 'xyz', 'decode', 42)
self.checkraises(TypeError, 'xyz', 'encode', 42) self.checkraises(TypeError, 'xyz', 'encode', 42)
class MixinUnicodeUserStringTest:
# Additional tests that only work with
# unicode compatible object, i.e. unicode and UserString
def test_iswide(self):
self.checkequal(False, u'', 'iswide')
self.checkequal(False, u'\x1f', 'iswide') # Neutral
self.checkequal(False, u'\x20', 'iswide') # Narrow
self.checkequal(True, u'\u2329', 'iswide') # Wide
self.checkequal(False, u'\uff64', 'iswide') # Half
self.checkequal(True, u'\u3000', 'iswide') # Full
self.checkequal(False, u'\u2460', 'iswide') # Ambiguous
self.checkequal(True, u'\ud55c\uae00', 'iswide')
self.checkequal(False, u'\ud55c\u2606\uae00', 'iswide')
def test_width(self):
self.checkequal(0, u'', 'width')
self.checkequal(4, u'abcd', 'width')
self.checkequal(2, u'\u0187\u01c9', 'width')
self.checkequal(3, u'\u2460\u2329', 'width')
self.checkequal(3, u'\u2329\u2460', 'width')
self.checkequal(4, u'\ud55c\uae00', 'width')
self.checkequal(5, u'\ud55c\u2606\uae00', 'width')
...@@ -11,8 +11,7 @@ from test import test_support, string_tests ...@@ -11,8 +11,7 @@ from test import test_support, string_tests
class UnicodeTest( class UnicodeTest(
string_tests.CommonTest, string_tests.CommonTest,
string_tests.MixinStrUnicodeUserStringTest, string_tests.MixinStrUnicodeUserStringTest
string_tests.MixinUnicodeUserStringTest
): ):
type2test = unicode type2test = unicode
......
...@@ -174,6 +174,17 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): ...@@ -174,6 +174,17 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
# The rest can be found in test_normalization.py # The rest can be found in test_normalization.py
# which requires an external file. # which requires an external file.
def test_east_asian_width(self):
eaw = self.db.east_asian_width
self.assertRaises(TypeError, eaw, 'a')
self.assertRaises(TypeError, eaw, u'')
self.assertRaises(TypeError, eaw, u'ra')
self.assertEqual(eaw(u'\x1e'), 'N')
self.assertEqual(eaw(u'\x20'), 'Na')
self.assertEqual(eaw(u'\uC894'), 'W')
self.assertEqual(eaw(u'\uFF66'), 'H')
self.assertEqual(eaw(u'\uFF1F'), 'F')
self.assertEqual(eaw(u'\u2010'), 'A')
class UnicodeMiscTest(UnicodeDatabaseTest): class UnicodeMiscTest(UnicodeDatabaseTest):
......
...@@ -11,8 +11,7 @@ class UserStringTest( ...@@ -11,8 +11,7 @@ class UserStringTest(
string_tests.CommonTest, string_tests.CommonTest,
string_tests.MixinStrUnicodeUserStringTest, string_tests.MixinStrUnicodeUserStringTest,
string_tests.MixinStrStringUserStringTest, string_tests.MixinStrStringUserStringTest,
string_tests.MixinStrUserStringTest, string_tests.MixinStrUserStringTest
string_tests.MixinUnicodeUserStringTest
): ):
type2test = UserString type2test = UserString
......
...@@ -67,6 +67,9 @@ Core and builtins ...@@ -67,6 +67,9 @@ Core and builtins
- Added a workaround for proper string operations in BSDs. str.split - Added a workaround for proper string operations in BSDs. str.split
and str.is* methods can now work correctly with UTF-8 locales. and str.is* methods can now work correctly with UTF-8 locales.
- unicode.iswide() and unicode.width() is dropped and the East Asian
Width support is moved to unicodedata extension module.
Extension modules Extension modules
----------------- -----------------
......
...@@ -24,6 +24,8 @@ typedef struct { ...@@ -24,6 +24,8 @@ typedef struct {
const unsigned char bidirectional; /* index into const unsigned char bidirectional; /* index into
_PyUnicode_BidirectionalNames */ _PyUnicode_BidirectionalNames */
const unsigned char mirrored; /* true if mirrored in bidir mode */ const unsigned char mirrored; /* true if mirrored in bidir mode */
const unsigned char east_asian_width; /* index into
_PyUnicode_EastAsianWidth */
} _PyUnicode_DatabaseRecord; } _PyUnicode_DatabaseRecord;
/* data file generated by Tools/unicode/makeunicodedata.py */ /* data file generated by Tools/unicode/makeunicodedata.py */
...@@ -204,6 +206,24 @@ unicodedata_mirrored(PyObject *self, PyObject *args) ...@@ -204,6 +206,24 @@ unicodedata_mirrored(PyObject *self, PyObject *args)
return PyInt_FromLong((int) _getrecord(v)->mirrored); return PyInt_FromLong((int) _getrecord(v)->mirrored);
} }
static PyObject *
unicodedata_east_asian_width(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
int index;
if (!PyArg_ParseTuple(args, "O!:east_asian_width",
&PyUnicode_Type, &v))
return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
return NULL;
}
index = (int) _getrecord(v)->east_asian_width;
return PyString_FromString(_PyUnicode_EastAsianWidthNames[index]);
}
static PyObject * static PyObject *
unicodedata_decomposition(PyObject *self, PyObject *args) unicodedata_decomposition(PyObject *self, PyObject *args)
{ {
...@@ -871,6 +891,7 @@ static PyMethodDef unicodedata_functions[] = { ...@@ -871,6 +891,7 @@ static PyMethodDef unicodedata_functions[] = {
{"bidirectional", unicodedata_bidirectional, METH_VARARGS}, {"bidirectional", unicodedata_bidirectional, METH_VARARGS},
{"combining", unicodedata_combining, METH_VARARGS}, {"combining", unicodedata_combining, METH_VARARGS},
{"mirrored", unicodedata_mirrored, METH_VARARGS}, {"mirrored", unicodedata_mirrored, METH_VARARGS},
{"east_asian_width", unicodedata_east_asian_width, METH_VARARGS},
{"decomposition",unicodedata_decomposition, METH_VARARGS}, {"decomposition",unicodedata_decomposition, METH_VARARGS},
{"name", unicodedata_name, METH_VARARGS}, {"name", unicodedata_name, METH_VARARGS},
{"lookup", unicodedata_lookup, METH_VARARGS}, {"lookup", unicodedata_lookup, METH_VARARGS},
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#define SPACE_MASK 0x20 #define SPACE_MASK 0x20
#define TITLE_MASK 0x40 #define TITLE_MASK 0x40
#define UPPER_MASK 0x80 #define UPPER_MASK 0x80
#define WIDE_MASK 0x100
typedef struct { typedef struct {
const Py_UNICODE upper; const Py_UNICODE upper;
...@@ -323,15 +322,6 @@ int _PyUnicode_IsNumeric(Py_UNICODE ch) ...@@ -323,15 +322,6 @@ int _PyUnicode_IsNumeric(Py_UNICODE ch)
return 1; return 1;
} }
/* Returns 1 for Unicode characters having Full or Wide width, 0 otherwise */
int _PyUnicode_IsWide(Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
return (ctype->flags & WIDE_MASK) != 0;
}
#ifndef WANT_WCTYPE_FUNCTIONS #ifndef WANT_WCTYPE_FUNCTIONS
/* Returns 1 for Unicode characters having the bidirectional type /* Returns 1 for Unicode characters having the bidirectional type
......
...@@ -702,27 +702,6 @@ int PyUnicode_GetSize(PyObject *unicode) ...@@ -702,27 +702,6 @@ int PyUnicode_GetSize(PyObject *unicode)
return -1; return -1;
} }
int PyUnicode_GetWidth(PyObject *unicode)
{
const Py_UNICODE *p, *e;
int width;
if (!PyUnicode_Check(unicode)) {
PyErr_BadArgument();
return -1;
}
p = PyUnicode_AS_UNICODE(unicode);
e = p + PyUnicode_GET_SIZE(unicode);
for (width = 0; p < e; p++)
if (Py_UNICODE_ISWIDE(*p))
width += 2;
else
width++;
return width;
}
const char *PyUnicode_GetDefaultEncoding(void) const char *PyUnicode_GetDefaultEncoding(void)
{ {
return unicode_default_encoding; return unicode_default_encoding;
...@@ -5436,35 +5415,6 @@ unicode_isnumeric(PyUnicodeObject *self) ...@@ -5436,35 +5415,6 @@ unicode_isnumeric(PyUnicodeObject *self)
return PyBool_FromLong(1); return PyBool_FromLong(1);
} }
PyDoc_STRVAR(iswide__doc__,
"S.iswide() -> bool\n\
\n\
Return True if all characters in S are wide width\n\
and there is at least one character in S, False otherwise.");
static PyObject*
unicode_iswide(PyUnicodeObject *self)
{
register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self);
register const Py_UNICODE *e;
/* Shortcut for single character strings */
if (PyUnicode_GET_SIZE(self) == 1 &&
Py_UNICODE_ISWIDE(*p))
Py_RETURN_TRUE;
/* Special case for empty strings */
if (PyString_GET_SIZE(self) == 0)
Py_RETURN_FALSE;
e = p + PyUnicode_GET_SIZE(self);
for (; p < e; p++) {
if (!Py_UNICODE_ISWIDE(*p))
Py_RETURN_FALSE;
}
Py_RETURN_TRUE;
}
PyDoc_STRVAR(join__doc__, PyDoc_STRVAR(join__doc__,
"S.join(sequence) -> unicode\n\ "S.join(sequence) -> unicode\n\
\n\ \n\
...@@ -6076,21 +6026,6 @@ unicode_upper(PyUnicodeObject *self) ...@@ -6076,21 +6026,6 @@ unicode_upper(PyUnicodeObject *self)
return fixup(self, fixupper); return fixup(self, fixupper);
} }
PyDoc_STRVAR(width__doc__,
"S.width() -> unicode\n\
\n\
Return a fixed-width representation length of S.");
static PyObject*
unicode_width(PyObject *self)
{
int width = PyUnicode_GetWidth(self);
if (width == -1)
return NULL;
else
return PyInt_FromLong((long)width);
}
PyDoc_STRVAR(zfill__doc__, PyDoc_STRVAR(zfill__doc__,
"S.zfill(width) -> unicode\n\ "S.zfill(width) -> unicode\n\
\n\ \n\
...@@ -6255,8 +6190,6 @@ static PyMethodDef unicode_methods[] = { ...@@ -6255,8 +6190,6 @@ static PyMethodDef unicode_methods[] = {
{"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__}, {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__},
{"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__}, {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__},
{"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__}, {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__},
{"iswide", (PyCFunction) unicode_iswide, METH_NOARGS, iswide__doc__},
{"width", (PyCFunction) unicode_width, METH_NOARGS, width__doc__},
{"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__}, {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
#if 0 #if 0
{"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__}, {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
......
This diff is collapsed.
...@@ -43,6 +43,8 @@ BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO", ...@@ -43,6 +43,8 @@ BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO",
"PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS", "PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
"ON" ] "ON" ]
EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ]
# note: should match definitions in Objects/unicodectype.c # note: should match definitions in Objects/unicodectype.c
ALPHA_MASK = 0x01 ALPHA_MASK = 0x01
DECIMAL_MASK = 0x02 DECIMAL_MASK = 0x02
...@@ -52,7 +54,6 @@ LINEBREAK_MASK = 0x10 ...@@ -52,7 +54,6 @@ LINEBREAK_MASK = 0x10
SPACE_MASK = 0x20 SPACE_MASK = 0x20
TITLE_MASK = 0x40 TITLE_MASK = 0x40
UPPER_MASK = 0x80 UPPER_MASK = 0x80
WIDE_MASK = 0x100
def maketables(trace=0): def maketables(trace=0):
...@@ -72,7 +73,7 @@ def maketables(trace=0): ...@@ -72,7 +73,7 @@ def maketables(trace=0):
def makeunicodedata(unicode, trace): def makeunicodedata(unicode, trace):
dummy = (0, 0, 0, 0) dummy = (0, 0, 0, 0, 0)
table = [dummy] table = [dummy]
cache = {0: dummy} cache = {0: dummy}
index = [0] * len(unicode.chars) index = [0] * len(unicode.chars)
...@@ -91,8 +92,9 @@ def makeunicodedata(unicode, trace): ...@@ -91,8 +92,9 @@ def makeunicodedata(unicode, trace):
combining = int(record[3]) combining = int(record[3])
bidirectional = BIDIRECTIONAL_NAMES.index(record[4]) bidirectional = BIDIRECTIONAL_NAMES.index(record[4])
mirrored = record[9] == "Y" mirrored = record[9] == "Y"
eastasianwidth = EASTASIANWIDTH_NAMES.index(record[15])
item = ( item = (
category, combining, bidirectional, mirrored category, combining, bidirectional, mirrored, eastasianwidth
) )
# add entry to index and item tables # add entry to index and item tables
i = cache.get(item) i = cache.get(item)
...@@ -204,7 +206,7 @@ def makeunicodedata(unicode, trace): ...@@ -204,7 +206,7 @@ def makeunicodedata(unicode, trace):
print >>fp, \ print >>fp, \
"const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {" "const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {"
for item in table: for item in table:
print >>fp, " {%d, %d, %d, %d}," % item print >>fp, " {%d, %d, %d, %d, %d}," % item
print >>fp, "};" print >>fp, "};"
print >>fp print >>fp
...@@ -239,6 +241,12 @@ def makeunicodedata(unicode, trace): ...@@ -239,6 +241,12 @@ def makeunicodedata(unicode, trace):
print >>fp, " NULL" print >>fp, " NULL"
print >>fp, "};" print >>fp, "};"
print >>fp, "const char *_PyUnicode_EastAsianWidthNames[] = {"
for name in EASTASIANWIDTH_NAMES:
print >>fp, " \"%s\"," % name
print >>fp, " NULL"
print >>fp, "};"
print >>fp, "static const char *decomp_prefix[] = {" print >>fp, "static const char *decomp_prefix[] = {"
for name in decomp_prefix: for name in decomp_prefix:
print >>fp, " \"%s\"," % name print >>fp, " \"%s\"," % name
...@@ -334,8 +342,6 @@ def makeunicodetype(unicode, trace): ...@@ -334,8 +342,6 @@ def makeunicodetype(unicode, trace):
if record[7]: if record[7]:
flags |= DIGIT_MASK flags |= DIGIT_MASK
digit = int(record[7]) digit = int(record[7])
if record[15] in ('W', 'F'): # Wide or Full width
flags |= WIDE_MASK
item = ( item = (
upper, lower, title, decimal, digit, flags upper, lower, title, decimal, digit, flags
) )
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment