Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
71f660e0
Commit
71f660e0
authored
Feb 20, 2012
by
Benjamin Peterson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update to Unicode 6.1
parent
16fa2a10
Changes
7
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
24458 additions
and
22968 deletions
+24458
-22968
Lib/test/test_unicodedata.py
Lib/test/test_unicodedata.py
+2
-2
Misc/NEWS
Misc/NEWS
+2
-0
Modules/unicodedata.c
Modules/unicodedata.c
+1
-1
Modules/unicodedata_db.h
Modules/unicodedata_db.h
+3687
-3509
Modules/unicodename_db.h
Modules/unicodename_db.h
+19488
-18286
Objects/unicodetype_db.h
Objects/unicodetype_db.h
+1273
-1166
Tools/unicode/makeunicodedata.py
Tools/unicode/makeunicodedata.py
+5
-4
No files found.
Lib/test/test_unicodedata.py
View file @
71f660e0
...
...
@@ -21,7 +21,7 @@ errors = 'surrogatepass'
class
UnicodeMethodsTest
(
unittest
.
TestCase
):
# update this, if the database changes
expectedchecksum
=
'
df0b3ca6785a070b21f837b227dbdbdff3c2e921
'
expectedchecksum
=
'
bf7a78f1a532421b5033600102e23a92044dbba9
'
def
test_method_checksum
(
self
):
h
=
hashlib
.
sha1
()
...
...
@@ -80,7 +80,7 @@ class UnicodeDatabaseTest(unittest.TestCase):
class
UnicodeFunctionsTest
(
UnicodeDatabaseTest
):
# update this, if the database changes
expectedchecksum
=
'
c23dfc0b5eaf3ca2aad32d733de96bb182ccda50
'
expectedchecksum
=
'
17fe2f12b788e4fff5479b469c4404bb6ecf841f
'
def
test_function_checksum
(
self
):
data
=
[]
h
=
hashlib
.
sha1
()
...
...
Misc/NEWS
View file @
71f660e0
...
...
@@ -10,6 +10,8 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins
-----------------
- Upgrade Unicode data to Unicode 6.1.
- Issue #14040: Remove rarely used file name suffixes for C extensions
(under POSIX mainly).
...
...
Modules/unicodedata.c
View file @
71f660e0
...
...
@@ -921,7 +921,7 @@ is_unified_ideograph(Py_UCS4 code)
{
return
(
0x3400
<=
code
&&
code
<=
0x4DB5
)
||
/* CJK Ideograph Extension A */
(
0x4E00
<=
code
&&
code
<=
0x9FC
B
)
||
/* CJK Ideograph */
(
0x4E00
<=
code
&&
code
<=
0x9FC
C
)
||
/* CJK Ideograph */
(
0x20000
<=
code
&&
code
<=
0x2A6D6
)
||
/* CJK Ideograph Extension B */
(
0x2A700
<=
code
&&
code
<=
0x2B734
)
||
/* CJK Ideograph Extension C */
(
0x2B740
<=
code
&&
code
<=
0x2B81D
);
/* CJK Ideograph Extension D */
...
...
Modules/unicodedata_db.h
View file @
71f660e0
This diff is collapsed.
Click to expand it.
Modules/unicodename_db.h
View file @
71f660e0
This diff is collapsed.
Click to expand it.
Objects/unicodetype_db.h
View file @
71f660e0
This source diff could not be displayed because it is too large. You can
view the blob
instead.
Tools/unicode/makeunicodedata.py
View file @
71f660e0
...
...
@@ -38,7 +38,7 @@ SCRIPT = sys.argv[0]
VERSION
=
"3.2"
# The Unicode Database
UNIDATA_VERSION
=
"6.
0
.0"
UNIDATA_VERSION
=
"6.
1
.0"
UNICODE_DATA
=
"UnicodeData%s.txt"
COMPOSITION_EXCLUSIONS
=
"CompositionExclusions%s.txt"
EASTASIAN_WIDTH
=
"EastAsianWidth%s.txt"
...
...
@@ -58,7 +58,7 @@ PUA_16 = range(0x100000, 0x10FFFE)
# we use this ranges of PUA_15 to store name aliases and named sequences
NAME_ALIASES_START
=
0xF0000
NAMED_SEQUENCES_START
=
0xF0
1
00
NAMED_SEQUENCES_START
=
0xF0
2
00
old_versions
=
[
"3.2.0"
]
...
...
@@ -95,7 +95,7 @@ EXTENDED_CASE_MASK = 0x4000
# these ranges need to match unicodedata.c:is_unified_ideograph
cjk_ranges
=
[
(
'3400'
,
'4DB5'
),
(
'4E00'
,
'9FC
B
'
),
(
'4E00'
,
'9FC
C
'
),
(
'20000'
,
'2A6D6'
),
(
'2A700'
,
'2B734'
),
(
'2B740'
,
'2B81D'
)
...
...
@@ -958,7 +958,7 @@ class UnicodeData:
s
=
s
.
strip
()
if
not
s
or
s
.
startswith
(
'#'
):
continue
char
,
name
=
s
.
split
(
';'
)
char
,
name
,
abbrev
=
s
.
split
(
';'
)
char
=
int
(
char
,
16
)
self
.
aliases
.
append
((
name
,
char
))
# also store the name in the PUA 1
...
...
@@ -971,6 +971,7 @@ class UnicodeData:
# in order to take advantage of the compression and lookup
# algorithms used for the other characters.
assert
pua_index
<
NAMED_SEQUENCES_START
pua_index
=
NAMED_SEQUENCES_START
with
open_data
(
NAMED_SEQUENCES
,
version
)
as
file
:
for
s
in
file
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment