Commit 70dda76c authored by Amaury Forgeot d'Arc's avatar Amaury Forgeot d'Arc

#1616979: Add the cp720 (Arabic DOS) encoding.

Since there is no official mapping file from unicode.org,
the codec file is generated on Windows with the new genwincodec.py script.
parent f31fd017
This diff is collapsed.
"""This script generates a Python codec module from a Windows Code Page.
It uses the function MultiByteToWideChar to generate a decoding table.
"""
import ctypes
from ctypes import wintypes
from gencodec import codegen
import unicodedata
def genwinmap(codepage):
MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
wintypes.LPCSTR, ctypes.c_int,
wintypes.LPWSTR, ctypes.c_int]
MultiByteToWideChar.restype = ctypes.c_int
enc2uni = {}
for i in range(32) + [127]:
enc2uni[i] = (i, 'CONTROL CHARACTER')
for i in range(256):
buf = ctypes.create_unicode_buffer(2)
ret = MultiByteToWideChar(
codepage, 0,
chr(i), 1,
buf, 2)
assert ret == 1, "invalid code page"
assert buf[1] == '\x00'
try:
name = unicodedata.name(buf[0])
except ValueError:
try:
name = enc2uni[i][1]
except KeyError:
name = ''
enc2uni[i] = (ord(buf[0]), name)
return enc2uni
def genwincodec(codepage):
import platform
map = genwinmap(codepage)
encodingname = 'cp%d' % codepage
code = codegen("", map, encodingname)
# Replace first lines with our own docstring
code = '''\
"""Python Character Mapping Codec %s generated on Windows:
%s with the command:
python Tools/unicode/genwincodec.py %s
"""#"
''' % (encodingname, ' '.join(platform.win32_ver()), codepage
) + code.split('"""#"', 1)[1]
print code
if __name__ == '__main__':
import sys
genwincodec(int(sys.argv[1]))
@rem Recreate some python charmap codecs from the Windows function
@rem MultiByteToWideChar.
@cd /d %~dp0
@mkdir build
@rem Arabic DOS code page
c:\python26\python genwincodec.py 720 > build/cp720.py
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment