Commit 7da80597 authored by Benjamin Peterson's avatar Benjamin Peterson

upgrade unicode db to 6.3.0 (closes #19221)

parent f7102c19
......@@ -15,8 +15,8 @@
This module provides access to the Unicode Character Database (UCD) which
defines character properties for all Unicode characters. The data contained in
this database is compiled from the `UCD version 6.2.0
<http://www.unicode.org/Public/6.2.0/ucd>`_.
this database is compiled from the `UCD version 6.3.0
<http://www.unicode.org/Public/6.3.0/ucd>`_.
The module uses the same names and symbols as defined by Unicode
Standard Annex #44, `"Unicode Character Database"
......@@ -166,6 +166,6 @@ Examples:
.. rubric:: Footnotes
.. [#] http://www.unicode.org/Public/6.2.0/ucd/NameAliases.txt
.. [#] http://www.unicode.org/Public/6.3.0/ucd/NameAliases.txt
.. [#] http://www.unicode.org/Public/6.2.0/ucd/NamedSequences.txt
.. [#] http://www.unicode.org/Public/6.3.0/ucd/NamedSequences.txt
......@@ -21,7 +21,7 @@ errors = 'surrogatepass'
class UnicodeMethodsTest(unittest.TestCase):
# update this, if the database changes
expectedchecksum = 'bf7a78f1a532421b5033600102e23a92044dbba9'
expectedchecksum = 'e74e878de71b6e780ffac271785c3cb58f6251f3'
def test_method_checksum(self):
h = hashlib.sha1()
......
......@@ -10,6 +10,8 @@ Projected release date: 2013-10-20
Core and Builtins
-----------------
- Issue #19221: Upgrade Unicode database to version 6.3.0.
- Issue #16742: The result of the C callback PyOS_ReadlineFunctionPointer must
now be a string allocated by PyMem_RawMalloc() or PyMem_RawRealloc() (or NULL
if an error occurred), instead of a string allocated by PyMem_Malloc() or
......
......@@ -1322,10 +1322,10 @@ PyDoc_STRVAR(unicodedata_docstring,
"This module provides access to the Unicode Character Database which\n\
defines character properties for all Unicode characters. The data in\n\
this database is based on the UnicodeData.txt file version\n\
6.0.0 which is publically available from ftp://ftp.unicode.org/.\n\
6.3.0 which is publically available from ftp://ftp.unicode.org/.\n\
\n\
The module uses the same names and symbols as defined by the\n\
UnicodeData File Format 6.0.0 (see\n\
UnicodeData File Format 6.3.0 (see\n\
http://www.unicode.org/reports/tr44/tr44-6.html).");
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
......@@ -1589,7 +1589,7 @@ static unsigned short index2[] = {
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0,
0, 0, 0, 55, 55, 55, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21,
21, 21, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 5, 0, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
25, 25, 25, 5, 21, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 96, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 7, 8,
......@@ -1801,7 +1801,7 @@ static unsigned short index2[] = {
25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 96, 5, 5, 5, 5, 55, 25, 0, 0, 7,
8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
25, 25, 25, 2, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0,
25, 25, 25, 21, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 96,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
......@@ -1828,7 +1828,7 @@ static unsigned short index2[] = {
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 132, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 18, 0, 0, 5, 5, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 25, 0, 0, 5, 5, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 25, 18, 25,
......@@ -1915,7 +1915,7 @@ static unsigned short index2[] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 6, 3, 3, 21, 21, 21, 21, 21, 2, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 21,
21, 21, 21, 21, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 245, 95, 0, 0,
21, 21, 21, 21, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 245, 95, 0, 0,
246, 247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 95, 245, 26, 22, 23, 246,
247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 0, 95, 95, 95, 95, 95, 95, 95,
95, 95, 95, 95, 95, 95, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
......@@ -2925,9 +2925,6 @@ static unsigned short index2[] = {
double _PyUnicode_ToNumeric(Py_UCS4 ch)
{
switch (ch) {
case 0x12456:
case 0x12457:
return (double) -1.0;
case 0x0F33:
return (double) -1.0/2.0;
case 0x0030:
......@@ -3383,6 +3380,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
case 0x12435:
case 0x1244A:
case 0x12450:
case 0x12456:
case 0x12459:
case 0x1D361:
case 0x1D7D0:
......@@ -3539,6 +3537,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
case 0x1243B:
case 0x1244B:
case 0x12451:
case 0x12457:
case 0x1D362:
case 0x1D7D1:
case 0x1D7DB:
......@@ -4294,7 +4293,6 @@ int _PyUnicode_IsWhitespace(const Py_UCS4 ch)
case 0x0085:
case 0x00A0:
case 0x1680:
case 0x180E:
case 0x2000:
case 0x2001:
case 0x2002:
......
......@@ -37,7 +37,7 @@ SCRIPT = sys.argv[0]
VERSION = "3.2"
# The Unicode Database
UNIDATA_VERSION = "6.2.0"
UNIDATA_VERSION = "6.3.0"
UNICODE_DATA = "UnicodeData%s.txt"
COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
......@@ -68,7 +68,7 @@ CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd",
BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO",
"PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
"ON" ]
"ON", "LRI", "RLI", "FSI", "PDI" ]
EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment