Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
c5137bbb
Commit
c5137bbb
authored
Sep 25, 2000
by
Fredrik Lundh
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
unicode database compression, step 3:
- added decimal digit and digit properties to the unidb tables
parent
09be9894
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
4 deletions
+19
-4
Tools/unicode/makeunicodedata.py
Tools/unicode/makeunicodedata.py
+19
-4
No files found.
Tools/unicode/makeunicodedata.py
View file @
c5137bbb
...
...
@@ -8,6 +8,7 @@
# 2000-09-24 fl created (based on bits and pieces from unidb)
# 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table
# 2000-09-25 fl added character type table
# 2000-09-26 fl added LINEBREAK flags
#
# written by Fredrik Lundh (fredrik@pythonware.com), September 2000
#
...
...
@@ -28,11 +29,12 @@ BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO",
"PDF"
,
"EN"
,
"ES"
,
"ET"
,
"AN"
,
"CS"
,
"NSM"
,
"BN"
,
"B"
,
"S"
,
"WS"
,
"ON"
]
# note: should match definitions in Objects/unicodectype.c
ALPHA_MASK
=
0x01
DECIMAL_MASK
=
0x02
DIGIT_MASK
=
0x04
LOWER_MASK
=
0x08
NUMERIC
_MASK
=
0x10
LINEBREAK
_MASK
=
0x10
SPACE_MASK
=
0x20
TITLE_MASK
=
0x40
UPPER_MASK
=
0x80
...
...
@@ -144,7 +146,7 @@ def maketables():
# 3) unicode type data
# extract unicode types
dummy
=
(
0
,
0
,
0
,
0
)
dummy
=
(
0
,
0
,
0
,
0
,
0
,
0
)
table
=
[
dummy
]
cache
=
{
0
:
dummy
}
index
=
[
0
]
*
len
(
unicode
.
chars
)
...
...
@@ -160,6 +162,8 @@ def maketables():
flags
|=
ALPHA_MASK
if
category
==
"Ll"
:
flags
|=
LOWER_MASK
if
category
==
"Zl"
or
bidirectional
==
"B"
:
flags
|=
LINEBREAK_MASK
if
category
==
"Zs"
or
bidirectional
in
(
"WS"
,
"B"
,
"S"
):
flags
|=
SPACE_MASK
if
category
in
[
"Lt"
,
"Lu"
]:
...
...
@@ -179,8 +183,17 @@ def maketables():
title
=
(
int
(
record
[
14
],
16
)
-
char
)
&
0xffff
else
:
title
=
0
# decimal digit, integer digit
decimal
=
0
if
record
[
6
]:
flags
|=
DECIMAL_MASK
decimal
=
int
(
record
[
6
])
digit
=
0
if
record
[
7
]:
flags
|=
DIGIT_MASK
digit
=
int
(
record
[
7
])
item
=
(
flags
,
upper
,
lower
,
title
flags
,
upper
,
lower
,
title
,
decimal
,
digit
)
# add entry to index and item tables
i
=
cache
.
get
(
item
)
...
...
@@ -189,6 +202,8 @@ def maketables():
table
.
append
(
item
)
index
[
char
]
=
i
print
len
(
table
),
"ctype entries"
FILE
=
"Objects/unicodetype_db.h"
sys
.
stdout
=
open
(
FILE
,
"w"
)
...
...
@@ -198,7 +213,7 @@ def maketables():
print
"/* a list of unique character type descriptors */"
print
"const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {"
for
item
in
table
:
print
" {%d, %d, %d, %d},"
%
item
print
" {%d, %d, %d, %d
, %d, %d
},"
%
item
print
"};"
print
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment