Commit 2f695616 authored by Ezio Melotti's avatar Ezio Melotti

Fix ResourceWarnings in makeunicodedata.py.

parent 763ffc85
...@@ -816,15 +816,15 @@ class UnicodeData: ...@@ -816,15 +816,15 @@ class UnicodeData:
expand=1, expand=1,
cjk_check=True): cjk_check=True):
self.changed = [] self.changed = []
file = open_data(UNICODE_DATA, version)
table = [None] * 0x110000 table = [None] * 0x110000
while 1: with open_data(UNICODE_DATA, version) as file:
s = file.readline() while 1:
if not s: s = file.readline()
break if not s:
s = s.strip().split(";") break
char = int(s[0], 16) s = s.strip().split(";")
table[char] = s char = int(s[0], 16)
table[char] = s
cjk_ranges_found = [] cjk_ranges_found = []
...@@ -855,32 +855,34 @@ class UnicodeData: ...@@ -855,32 +855,34 @@ class UnicodeData:
self.table = table self.table = table
self.chars = list(range(0x110000)) # unicode 3.2 self.chars = list(range(0x110000)) # unicode 3.2
file = open_data(COMPOSITION_EXCLUSIONS, version)
self.exclusions = {} self.exclusions = {}
for s in file: with open_data(COMPOSITION_EXCLUSIONS, version) as file:
s = s.strip() for s in file:
if not s: s = s.strip()
continue if not s:
if s[0] == '#': continue
continue if s[0] == '#':
char = int(s.split()[0],16) continue
self.exclusions[char] = 1 char = int(s.split()[0],16)
self.exclusions[char] = 1
widths = [None] * 0x110000 widths = [None] * 0x110000
for s in open_data(EASTASIAN_WIDTH, version): with open_data(EASTASIAN_WIDTH, version) as file:
s = s.strip() for s in file:
if not s: s = s.strip()
continue if not s:
if s[0] == '#': continue
continue if s[0] == '#':
s = s.split()[0].split(';') continue
if '..' in s[0]: s = s.split()[0].split(';')
first, last = [int(c, 16) for c in s[0].split('..')] if '..' in s[0]:
chars = list(range(first, last+1)) first, last = [int(c, 16) for c in s[0].split('..')]
else: chars = list(range(first, last+1))
chars = [int(s[0], 16)] else:
for char in chars: chars = [int(s[0], 16)]
widths[char] = s[1] for char in chars:
widths[char] = s[1]
for i in range(0, 0x110000): for i in range(0, 0x110000):
if table[i] is not None: if table[i] is not None:
table[i].append(widths[i]) table[i].append(widths[i])
...@@ -888,36 +890,39 @@ class UnicodeData: ...@@ -888,36 +890,39 @@ class UnicodeData:
for i in range(0, 0x110000): for i in range(0, 0x110000):
if table[i] is not None: if table[i] is not None:
table[i].append(set()) table[i].append(set())
for s in open_data(DERIVED_CORE_PROPERTIES, version):
s = s.split('#', 1)[0].strip()
if not s:
continue
r, p = s.split(";") with open_data(DERIVED_CORE_PROPERTIES, version) as file:
r = r.strip() for s in file:
p = p.strip() s = s.split('#', 1)[0].strip()
if ".." in r: if not s:
first, last = [int(c, 16) for c in r.split('..')] continue
chars = list(range(first, last+1))
else: r, p = s.split(";")
chars = [int(r, 16)] r = r.strip()
for char in chars: p = p.strip()
if table[char]: if ".." in r:
# Some properties (e.g. Default_Ignorable_Code_Point) first, last = [int(c, 16) for c in r.split('..')]
# apply to unassigned code points; ignore them chars = list(range(first, last+1))
table[char][-1].add(p) else:
chars = [int(r, 16)]
for s in open_data(LINE_BREAK, version): for char in chars:
s = s.partition('#')[0] if table[char]:
s = [i.strip() for i in s.split(';')] # Some properties (e.g. Default_Ignorable_Code_Point)
if len(s) < 2 or s[1] not in MANDATORY_LINE_BREAKS: # apply to unassigned code points; ignore them
continue table[char][-1].add(p)
if '..' not in s[0]:
first = last = int(s[0], 16) with open_data(LINE_BREAK, version) as file:
else: for s in file:
first, last = [int(c, 16) for c in s[0].split('..')] s = s.partition('#')[0]
for char in range(first, last+1): s = [i.strip() for i in s.split(';')]
table[char][-1].add('Line_Break') if len(s) < 2 or s[1] not in MANDATORY_LINE_BREAKS:
continue
if '..' not in s[0]:
first = last = int(s[0], 16)
else:
first, last = [int(c, 16) for c in s[0].split('..')]
for char in range(first, last+1):
table[char][-1].add('Line_Break')
# We only want the quickcheck properties # We only want the quickcheck properties
# Format: NF?_QC; Y(es)/N(o)/M(aybe) # Format: NF?_QC; Y(es)/N(o)/M(aybe)
...@@ -928,31 +933,33 @@ class UnicodeData: ...@@ -928,31 +933,33 @@ class UnicodeData:
# for older versions, and no delta records will be created. # for older versions, and no delta records will be created.
quickchecks = [0] * 0x110000 quickchecks = [0] * 0x110000
qc_order = 'NFD_QC NFKD_QC NFC_QC NFKC_QC'.split() qc_order = 'NFD_QC NFKD_QC NFC_QC NFKC_QC'.split()
for s in open_data(DERIVEDNORMALIZATION_PROPS, version): with open_data(DERIVEDNORMALIZATION_PROPS, version) as file:
if '#' in s: for s in file:
s = s[:s.index('#')] if '#' in s:
s = [i.strip() for i in s.split(';')] s = s[:s.index('#')]
if len(s) < 2 or s[1] not in qc_order: s = [i.strip() for i in s.split(';')]
continue if len(s) < 2 or s[1] not in qc_order:
quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No continue
quickcheck_shift = qc_order.index(s[1])*2 quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No
quickcheck <<= quickcheck_shift quickcheck_shift = qc_order.index(s[1])*2
if '..' not in s[0]: quickcheck <<= quickcheck_shift
first = last = int(s[0], 16) if '..' not in s[0]:
else: first = last = int(s[0], 16)
first, last = [int(c, 16) for c in s[0].split('..')] else:
for char in range(first, last+1): first, last = [int(c, 16) for c in s[0].split('..')]
assert not (quickchecks[char]>>quickcheck_shift)&3 for char in range(first, last+1):
quickchecks[char] |= quickcheck assert not (quickchecks[char]>>quickcheck_shift)&3
quickchecks[char] |= quickcheck
for i in range(0, 0x110000): for i in range(0, 0x110000):
if table[i] is not None: if table[i] is not None:
table[i].append(quickchecks[i]) table[i].append(quickchecks[i])
zip = zipfile.ZipFile(open_data(UNIHAN, version)) with open_data(UNIHAN, version) as file:
if version == '3.2.0': zip = zipfile.ZipFile(file)
data = zip.open('Unihan-3.2.0.txt').read() if version == '3.2.0':
else: data = zip.open('Unihan-3.2.0.txt').read()
data = zip.open('Unihan_NumericValues.txt').read() else:
data = zip.open('Unihan_NumericValues.txt').read()
for line in data.decode("utf-8").splitlines(): for line in data.decode("utf-8").splitlines():
if not line.startswith('U+'): if not line.startswith('U+'):
continue continue
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment