Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
7da80597
Commit
7da80597
authored
Oct 10, 2013
by
Benjamin Peterson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
upgrade unicode db to 6.3.0 (closes #19221)
parent
f7102c19
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
17195 additions
and
17153 deletions
+17195
-17153
Doc/library/unicodedata.rst
Doc/library/unicodedata.rst
+4
-4
Lib/test/test_unicodedata.py
Lib/test/test_unicodedata.py
+1
-1
Misc/NEWS
Misc/NEWS
+2
-0
Modules/unicodedata.c
Modules/unicodedata.c
+2
-2
Modules/unicodedata_db.h
Modules/unicodedata_db.h
+792
-784
Modules/unicodename_db.h
Modules/unicodename_db.h
+16386
-16352
Objects/unicodetype_db.h
Objects/unicodetype_db.h
+6
-8
Tools/unicode/makeunicodedata.py
Tools/unicode/makeunicodedata.py
+2
-2
No files found.
Doc/library/unicodedata.rst
View file @
7da80597
...
...
@@ -15,8 +15,8 @@
This module provides access to the Unicode Character Database (UCD) which
defines character properties for all Unicode characters. The data contained in
this database is compiled from the `UCD version 6.
2
.0
<http://www.unicode.org/Public/6.
2
.0/ucd>`_.
this database is compiled from the `UCD version 6.
3
.0
<http://www.unicode.org/Public/6.
3
.0/ucd>`_.
The module uses the same names and symbols as defined by Unicode
Standard Annex #44, `"Unicode Character Database"
...
...
@@ -166,6 +166,6 @@ Examples:
.. rubric:: Footnotes
.. [#] http://www.unicode.org/Public/6.
2
.0/ucd/NameAliases.txt
.. [#] http://www.unicode.org/Public/6.
3
.0/ucd/NameAliases.txt
.. [#] http://www.unicode.org/Public/6.
2
.0/ucd/NamedSequences.txt
.. [#] http://www.unicode.org/Public/6.
3
.0/ucd/NamedSequences.txt
Lib/test/test_unicodedata.py
View file @
7da80597
...
...
@@ -21,7 +21,7 @@ errors = 'surrogatepass'
class
UnicodeMethodsTest
(
unittest
.
TestCase
):
# update this, if the database changes
expectedchecksum
=
'
bf7a78f1a532421b5033600102e23a92044dbba9
'
expectedchecksum
=
'
e74e878de71b6e780ffac271785c3cb58f6251f3
'
def
test_method_checksum
(
self
):
h
=
hashlib
.
sha1
()
...
...
Misc/NEWS
View file @
7da80597
...
...
@@ -10,6 +10,8 @@ Projected release date: 2013-10-20
Core and Builtins
-----------------
- Issue #19221: Upgrade Unicode database to version 6.3.0.
- Issue #16742: The result of the C callback PyOS_ReadlineFunctionPointer must
now be a string allocated by PyMem_RawMalloc() or PyMem_RawRealloc() (or NULL
if an error occurred), instead of a string allocated by PyMem_Malloc() or
...
...
Modules/unicodedata.c
View file @
7da80597
...
...
@@ -1322,10 +1322,10 @@ PyDoc_STRVAR(unicodedata_docstring,
"This module provides access to the Unicode Character Database which
\n
\
defines character properties for all Unicode characters. The data in
\n
\
this database is based on the UnicodeData.txt file version
\n
\
6.
0
.0 which is publically available from ftp://ftp.unicode.org/.
\n
\
6.
3
.0 which is publically available from ftp://ftp.unicode.org/.
\n
\
\n
\
The module uses the same names and symbols as defined by the
\n
\
UnicodeData File Format 6.
0
.0 (see
\n
\
UnicodeData File Format 6.
3
.0 (see
\n
\
http://www.unicode.org/reports/tr44/tr44-6.html)."
);
...
...
Modules/unicodedata_db.h
View file @
7da80597
This source diff could not be displayed because it is too large. You can
view the blob
instead.
Modules/unicodename_db.h
View file @
7da80597
This diff is collapsed.
Click to expand it.
Objects/unicodetype_db.h
View file @
7da80597
...
...
@@ -1589,7 +1589,7 @@ static unsigned short index2[] = {
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
0
,
0
,
0
,
0
,
0
,
55
,
55
,
55
,
5
,
6
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
21
,
21
,
21
,
21
,
21
,
0
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
5
,
0
,
0
,
5
,
5
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
25
,
25
,
25
,
5
,
21
,
0
,
5
,
5
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
96
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
7
,
8
,
...
...
@@ -1801,7 +1801,7 @@ static unsigned short index2[] = {
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
5
,
5
,
5
,
96
,
5
,
5
,
5
,
5
,
55
,
25
,
0
,
0
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
0
,
0
,
0
,
0
,
0
,
0
,
27
,
27
,
27
,
27
,
27
,
27
,
27
,
27
,
27
,
27
,
0
,
0
,
0
,
0
,
0
,
0
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
25
,
25
,
25
,
2
,
0
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
0
,
0
,
0
,
0
,
0
,
0
,
25
,
25
,
25
,
2
1
,
0
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
0
,
0
,
0
,
0
,
0
,
0
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
96
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
...
...
@@ -1828,7 +1828,7 @@ static unsigned short index2[] = {
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
132
,
0
,
0
,
0
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
25
,
25
,
18
,
18
,
18
,
0
,
0
,
5
,
5
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
25
,
25
,
18
,
18
,
25
,
0
,
0
,
5
,
5
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
55
,
18
,
25
,
18
,
25
,
...
...
@@ -1915,7 +1915,7 @@ static unsigned short index2[] = {
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
6
,
5
,
5
,
6
,
3
,
3
,
21
,
21
,
21
,
21
,
21
,
2
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
18
,
18
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
18
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
2
,
21
,
21
,
21
,
21
,
21
,
0
,
0
,
0
,
0
,
0
,
21
,
21
,
21
,
21
,
21
,
21
,
245
,
95
,
0
,
0
,
21
,
21
,
21
,
21
,
0
,
21
,
21
,
21
,
21
,
21
,
21
,
21
,
21
,
21
,
21
,
245
,
95
,
0
,
0
,
246
,
247
,
248
,
249
,
250
,
251
,
5
,
5
,
5
,
5
,
5
,
95
,
245
,
26
,
22
,
23
,
246
,
247
,
248
,
249
,
250
,
251
,
5
,
5
,
5
,
5
,
5
,
0
,
95
,
95
,
95
,
95
,
95
,
95
,
95
,
95
,
95
,
95
,
95
,
95
,
95
,
0
,
0
,
0
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
...
...
@@ -2925,9 +2925,6 @@ static unsigned short index2[] = {
double
_PyUnicode_ToNumeric
(
Py_UCS4
ch
)
{
switch
(
ch
)
{
case
0x12456
:
case
0x12457
:
return
(
double
)
-
1
.
0
;
case
0x0F33
:
return
(
double
)
-
1
.
0
/
2
.
0
;
case
0x0030
:
...
...
@@ -3383,6 +3380,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
case
0x12435
:
case
0x1244A
:
case
0x12450
:
case
0x12456
:
case
0x12459
:
case
0x1D361
:
case
0x1D7D0
:
...
...
@@ -3539,6 +3537,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
case
0x1243B
:
case
0x1244B
:
case
0x12451
:
case
0x12457
:
case
0x1D362
:
case
0x1D7D1
:
case
0x1D7DB
:
...
...
@@ -4294,7 +4293,6 @@ int _PyUnicode_IsWhitespace(const Py_UCS4 ch)
case
0x0085
:
case
0x00A0
:
case
0x1680
:
case
0x180E
:
case
0x2000
:
case
0x2001
:
case
0x2002
:
...
...
Tools/unicode/makeunicodedata.py
View file @
7da80597
...
...
@@ -37,7 +37,7 @@ SCRIPT = sys.argv[0]
VERSION
=
"3.2"
# The Unicode Database
UNIDATA_VERSION
=
"6.
2
.0"
UNIDATA_VERSION
=
"6.
3
.0"
UNICODE_DATA
=
"UnicodeData%s.txt"
COMPOSITION_EXCLUSIONS
=
"CompositionExclusions%s.txt"
EASTASIAN_WIDTH
=
"EastAsianWidth%s.txt"
...
...
@@ -68,7 +68,7 @@ CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd",
BIDIRECTIONAL_NAMES
=
[
""
,
"L"
,
"LRE"
,
"LRO"
,
"R"
,
"AL"
,
"RLE"
,
"RLO"
,
"PDF"
,
"EN"
,
"ES"
,
"ET"
,
"AN"
,
"CS"
,
"NSM"
,
"BN"
,
"B"
,
"S"
,
"WS"
,
"ON"
]
"ON"
,
"LRI"
,
"RLI"
,
"FSI"
,
"PDI"
]
EASTASIANWIDTH_NAMES
=
[
"F"
,
"H"
,
"W"
,
"Na"
,
"A"
,
"N"
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment