Commit 2bb146f2 authored by Hye-Shik Chang's avatar Hye-Shik Chang

Bring CJKCodecs 1.1 into trunk. This completely reorganizes source

and installed layouts to make maintenance simple and easy.  And it
also adds four new codecs; big5hkscs, euc-jis-2004, shift-jis-2004
and iso2022-jp-2004.
parent 264c6591
......@@ -548,6 +548,10 @@ exist:
{big5-tw, csbig5}
{Traditional Chinese}
\lineiii{big5hkscs}
{big5-hkscs, hkscs}
{Traditional Chinese}
\lineiii{cp037}
{IBM037, IBM039}
{English}
......@@ -696,8 +700,12 @@ exist:
{eucjp, ujis, u-jis}
{Japanese}
\lineiii{euc_jis_2004}
{jisx0213, eucjis2004}
{Japanese}
\lineiii{euc_jisx0213}
{jisx0213, eucjisx0213}
{eucjisx0213}
{Japanese}
\lineiii{euc_kr}
......@@ -733,6 +741,10 @@ exist:
{iso2022jp-2, iso-2022-jp-2}
{Japanese, Korean, Simplified Chinese, Western Europe, Greek}
\lineiii{iso2022_jp_2004}
{iso2022jp-2004, iso-2022-jp-2004}
{Japanese}
\lineiii{iso2022_jp_3}
{iso2022jp-3, iso-2022-jp-3}
{Japanese}
......@@ -841,6 +853,10 @@ exist:
{csshiftjis, shiftjis, sjis, s_jis}
{Japanese}
\lineiii{shift_jis_2004}
{shiftjis2004, sjis_2004, sjis2004}
{Japanese}
\lineiii{shift_jisx0213}
{shiftjisx0213, sjisx0213, s_jisx0213}
{Japanese}
......
......@@ -39,6 +39,10 @@ aliases = {
'big5_tw' : 'big5',
'csbig5' : 'big5',
# big5hkscs codec
'big5_hkscs' : 'big5hkscs',
'hkscs' : 'big5hkscs',
# bz2_codec codec
'bz2' : 'bz2_codec',
......@@ -197,8 +201,12 @@ aliases = {
'950' : 'cp950',
'ms950' : 'cp950',
# euc_jis_2004 codec
'jisx0213' : 'euc_jis_2004',
'eucjis2004' : 'euc_jis_2004',
'euc_jis2004' : 'euc_jis_2004',
# euc_jisx0213 codec
'jisx0213' : 'euc_jisx0213',
'eucjisx0213' : 'euc_jisx0213',
# euc_jp codec
......@@ -254,6 +262,10 @@ aliases = {
'iso2022jp_2' : 'iso2022_jp_2',
'iso_2022_jp_2' : 'iso2022_jp_2',
# iso2022_jp_2004 codec
'iso_2022_jp_2004' : 'iso2022_jp_2004',
'iso2022jp_2004' : 'iso2022_jp_2004',
# iso2022_jp_3 codec
'iso2022jp_3' : 'iso2022_jp_3',
'iso_2022_jp_3' : 'iso2022_jp_3',
......@@ -416,6 +428,11 @@ aliases = {
'sjis' : 'shift_jis',
's_jis' : 'shift_jis',
# shift_jis_2004 codec
'shiftjis2004' : 'shift_jis_2004',
'sjis_2004' : 'shift_jis_2004',
's_jis_2004' : 'shift_jis_2004',
# shift_jisx0213 codec
'shiftjisx0213' : 'shift_jisx0213',
'sjisx0213' : 'shift_jisx0213',
......
......@@ -2,11 +2,12 @@
# big5.py: Python Unicode Codec for BIG5
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: big5.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: big5.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_big5 import codec
import codecs
import _codecs_tw, codecs
codec = _codecs_tw.getcodec('big5')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
#
# big5hkscs.py: Python Unicode Codec for BIG5HKSCS
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: big5hkscs.py,v 1.1 2004/06/29 05:14:27 perky Exp $
#
import _codecs_hk, codecs
codec = _codecs_hk.getcodec('big5hkscs')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class StreamReader(Codec, codecs.StreamReader):
def __init__(self, stream, errors='strict'):
codecs.StreamReader.__init__(self, stream, errors)
__codec = codec.StreamReader(stream, errors)
self.read = __codec.read
self.readline = __codec.readline
self.readlines = __codec.readlines
self.reset = __codec.reset
class StreamWriter(Codec, codecs.StreamWriter):
def __init__(self, stream, errors='strict'):
codecs.StreamWriter.__init__(self, stream, errors)
__codec = codec.StreamWriter(stream, errors)
self.write = __codec.write
self.writelines = __codec.writelines
self.reset = __codec.reset
def getregentry():
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -2,11 +2,12 @@
# cp932.py: Python Unicode Codec for CP932
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: cp932.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: cp932.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_cp932 import codec
import codecs
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('cp932')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -2,11 +2,12 @@
# cp949.py: Python Unicode Codec for CP949
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: cp949.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: cp949.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_cp949 import codec
import codecs
import _codecs_kr, codecs
codec = _codecs_kr.getcodec('cp949')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -2,11 +2,12 @@
# cp950.py: Python Unicode Codec for CP950
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: cp950.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: cp950.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_cp950 import codec
import codecs
import _codecs_tw, codecs
codec = _codecs_tw.getcodec('cp950')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
#
# euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: euc_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
#
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('euc_jis_2004')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class StreamReader(Codec, codecs.StreamReader):
def __init__(self, stream, errors='strict'):
codecs.StreamReader.__init__(self, stream, errors)
__codec = codec.StreamReader(stream, errors)
self.read = __codec.read
self.readline = __codec.readline
self.readlines = __codec.readlines
self.reset = __codec.reset
class StreamWriter(Codec, codecs.StreamWriter):
def __init__(self, stream, errors='strict'):
codecs.StreamWriter.__init__(self, stream, errors)
__codec = codec.StreamWriter(stream, errors)
self.write = __codec.write
self.writelines = __codec.writelines
self.reset = __codec.reset
def getregentry():
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -2,11 +2,12 @@
# euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: euc_jisx0213.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: euc_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_euc_jisx0213 import codec
import codecs
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('euc_jisx0213')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -2,11 +2,12 @@
# euc_jp.py: Python Unicode Codec for EUC_JP
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: euc_jp.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: euc_jp.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_euc_jp import codec
import codecs
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('euc_jp')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -2,11 +2,12 @@
# euc_kr.py: Python Unicode Codec for EUC_KR
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: euc_kr.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: euc_kr.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_euc_kr import codec
import codecs
import _codecs_kr, codecs
codec = _codecs_kr.getcodec('euc_kr')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -2,11 +2,12 @@
# gb18030.py: Python Unicode Codec for GB18030
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: gb18030.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: gb18030.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_gb18030 import codec
import codecs
import _codecs_cn, codecs
codec = _codecs_cn.getcodec('gb18030')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -2,11 +2,12 @@
# gb2312.py: Python Unicode Codec for GB2312
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: gb2312.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: gb2312.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_gb2312 import codec
import codecs
import _codecs_cn, codecs
codec = _codecs_cn.getcodec('gb2312')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -2,11 +2,12 @@
# gbk.py: Python Unicode Codec for GBK
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: gbk.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: gbk.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_gbk import codec
import codecs
import _codecs_cn, codecs
codec = _codecs_cn.getcodec('gbk')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -2,11 +2,12 @@
# hz.py: Python Unicode Codec for HZ
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: hz.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: hz.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_hz import codec
import codecs
import _codecs_cn, codecs
codec = _codecs_cn.getcodec('hz')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
#
# iso2022_jp.py: Python Unicode Codec for ISO_2022_JP
# iso2022_jp.py: Python Unicode Codec for ISO2022_JP
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_jp.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: iso2022_jp.py,v 1.2 2004/06/28 18:16:03 perky Exp $
#
from _codecs_iso2022_jp import codec
import codecs
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_jp')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
#
# iso2022_jp_1.py: Python Unicode Codec for ISO_2022_JP_1
# iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_jp_1.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: iso2022_jp_1.py,v 1.2 2004/06/28 18:16:03 perky Exp $
#
from _codecs_iso2022_jp_1 import codec
import codecs
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_jp_1')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
#
# iso2022_jp_2.py: Python Unicode Codec for ISO_2022_JP_2
# iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_jp_2.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: iso2022_jp_2.py,v 1.2 2004/06/28 18:16:03 perky Exp $
#
from _codecs_iso2022_jp_2 import codec
import codecs
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_jp_2')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
#
# iso2022_jp_2004.py: Python Unicode Codec for ISO2022_JP_2004
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_jp_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
#
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_jp_2004')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class StreamReader(Codec, codecs.StreamReader):
def __init__(self, stream, errors='strict'):
codecs.StreamReader.__init__(self, stream, errors)
__codec = codec.StreamReader(stream, errors)
self.read = __codec.read
self.readline = __codec.readline
self.readlines = __codec.readlines
self.reset = __codec.reset
class StreamWriter(Codec, codecs.StreamWriter):
def __init__(self, stream, errors='strict'):
codecs.StreamWriter.__init__(self, stream, errors)
__codec = codec.StreamWriter(stream, errors)
self.write = __codec.write
self.writelines = __codec.writelines
self.reset = __codec.reset
def getregentry():
return (codec.encode, codec.decode, StreamReader, StreamWriter)
#
# iso2022_jp_3.py: Python Unicode Codec for ISO_2022_JP_3
# iso2022_jp_3.py: Python Unicode Codec for ISO2022_JP_3
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_jp_3.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: iso2022_jp_3.py,v 1.2 2004/06/28 18:16:03 perky Exp $
#
from _codecs_iso2022_jp_3 import codec
import codecs
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_jp_3')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
#
# iso2022_jp_ext.py: Python Unicode Codec for ISO_2022_JP_EXT
# iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_jp_ext.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: iso2022_jp_ext.py,v 1.2 2004/06/28 18:16:03 perky Exp $
#
from _codecs_iso2022_jp_ext import codec
import codecs
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_jp_ext')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
#
# iso2022_kr.py: Python Unicode Codec for ISO_2022_KR
# iso2022_kr.py: Python Unicode Codec for ISO2022_KR
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: iso2022_kr.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: iso2022_kr.py,v 1.2 2004/06/28 18:16:03 perky Exp $
#
from _codecs_iso2022_kr import codec
import codecs
import _codecs_iso2022, codecs
codec = _codecs_iso2022.getcodec('iso2022_kr')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -2,11 +2,12 @@
# johab.py: Python Unicode Codec for JOHAB
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: johab.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: johab.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_johab import codec
import codecs
import _codecs_kr, codecs
codec = _codecs_kr.getcodec('johab')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -2,11 +2,12 @@
# shift_jis.py: Python Unicode Codec for SHIFT_JIS
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: shift_jis.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: shift_jis.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_shift_jis import codec
import codecs
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('shift_jis')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
#
# shift_jis_2004.py: Python Unicode Codec for SHIFT_JIS_2004
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: shift_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
#
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('shift_jis_2004')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class StreamReader(Codec, codecs.StreamReader):
def __init__(self, stream, errors='strict'):
codecs.StreamReader.__init__(self, stream, errors)
__codec = codec.StreamReader(stream, errors)
self.read = __codec.read
self.readline = __codec.readline
self.readlines = __codec.readlines
self.reset = __codec.reset
class StreamWriter(Codec, codecs.StreamWriter):
def __init__(self, stream, errors='strict'):
codecs.StreamWriter.__init__(self, stream, errors)
__codec = codec.StreamWriter(stream, errors)
self.write = __codec.write
self.writelines = __codec.writelines
self.reset = __codec.reset
def getregentry():
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -2,11 +2,12 @@
# shift_jisx0213.py: Python Unicode Codec for SHIFT_JISX0213
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
# $CJKCodecs: shift_jisx0213.py,v 1.3 2004/01/17 11:26:10 perky Exp $
# $CJKCodecs: shift_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $
#
from _codecs_shift_jisx0213 import codec
import codecs
import _codecs_jp, codecs
codec = _codecs_jp.getcodec('shift_jisx0213')
class Codec(codecs.Codec):
encode = codec.encode
......@@ -30,4 +31,4 @@ class StreamWriter(Codec, codecs.StreamWriter):
self.reset = __codec.reset
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
return (codec.encode, codec.decode, StreamReader, StreamWriter)
......@@ -63,6 +63,9 @@ teststring = {
"\x88\x91\xe5\x80\x91\xe6\x89\x80\x0a\xe8\xa6\x81\xe8\xa8\x8e\xe8"
"\xab\x96\xe7\x9a\x84\xe5\x95\x8f\xe9\xa1\x8c\xe5\xb0\xb1\xe6\x98"
"\xaf\x3a\x0a\x0a"),
'big5hkscs': (
"\x88\x45\x88\x5c\x8a\x73\x8b\xda\x8d\xd8\x0a",
"\xf0\xa0\x84\x8c\xc4\x9a\xe9\xb5\xae\xe7\xbd\x93\xe6\xb4\x86\x0a"),
'cp949': (
"\x8c\x63\xb9\xe6\xb0\xa2\xc7\xcf\x20\xbc\x84\xbd\xc3\xc4\xdd\xb6"
"\xf3\x0a\x0a\xa8\xc0\xa8\xc0\xb3\xb3\x21\x21\x20\xec\xd7\xce\xfa"
......
......@@ -3,7 +3,7 @@
# test_codecencodings_cn.py
# Codec encoding tests for PRC encodings.
#
# $CJKCodecs: test_codecencodings_cn.py,v 1.1 2003/12/19 03:00:05 perky Exp $
# $CJKCodecs: test_codecencodings_cn.py,v 1.2 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support
......
#!/usr/bin/env python
#
# test_codecencodings_hk.py
# Codec encoding tests for HongKong encodings.
#
# $CJKCodecs: test_codecencodings_hk.py,v 1.1 2004/07/10 17:35:20 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class Test_Big5HKSCS(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'big5hkscs'
tstring = test_multibytecodec_support.load_teststring('big5hkscs')
codectests = (
# invalid bytes
("abc\x80\x80\xc1\xc4", "strict", None),
("abc\xc8", "strict", None),
("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u8b10"),
("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u8b10\ufffd"),
("abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"),
)
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test_Big5HKSCS))
test_support.run_suite(suite)
if __name__ == "__main__":
test_main()
......@@ -3,7 +3,7 @@
# test_codecencodings_jp.py
# Codec encoding tests for Japanese encodings.
#
# $CJKCodecs: test_codecencodings_jp.py,v 1.2 2004/01/06 09:25:37 perky Exp $
# $CJKCodecs: test_codecencodings_jp.py,v 1.3 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support
......@@ -64,16 +64,6 @@ class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
(u"\u203e", "strict", "\x7e"),
)
class Test_EUC_JP_STRICT(test_multibytecodec_support.TestBase,
unittest.TestCase):
encoding = 'euc_jp_strict'
tstring = test_multibytecodec_support.load_teststring('euc_jp')
codectests = eucjp_commontests + (
("\xa1\xc0\\", "strict", u"\\\\"),
(u"\xa5", "strict", None),
(u"\u203e", "strict", None),
)
shiftjis_commonenctests = (
("abc\x80\x80\x82\x84", "strict", None),
("abc\xf8", "strict", None),
......@@ -90,14 +80,6 @@ class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
("\x81\x5f\x81\x61\x81\x7c", "strict", u"\uff3c\u2016\u2212"),
)
class Test_SJIS_STRICT(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'shift_jis_strict'
tstring = test_multibytecodec_support.load_teststring('shift_jis')
codectests = shiftjis_commonenctests + (
("\\\x7e", "replace", u"\xa5\u203e"),
("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
)
class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'shift_jisx0213'
tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
......@@ -123,9 +105,6 @@ def test_main():
suite.addTest(unittest.makeSuite(Test_EUC_JISX0213))
suite.addTest(unittest.makeSuite(Test_EUC_JP_COMPAT))
suite.addTest(unittest.makeSuite(Test_SJIS_COMPAT))
if test_multibytecodec_support.__cjkcodecs__:
suite.addTest(unittest.makeSuite(Test_EUC_JP_STRICT))
suite.addTest(unittest.makeSuite(Test_SJIS_STRICT))
suite.addTest(unittest.makeSuite(Test_SJISX0213))
test_support.run_suite(suite)
......
......@@ -3,7 +3,7 @@
# test_codecencodings_kr.py
# Codec encoding tests for ROK encodings.
#
# $CJKCodecs: test_codecencodings_kr.py,v 1.1 2003/12/19 03:00:06 perky Exp $
# $CJKCodecs: test_codecencodings_kr.py,v 1.2 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support
......
......@@ -3,7 +3,7 @@
# test_codecencodings_tw.py
# Codec encoding tests for ROC encodings.
#
# $CJKCodecs: test_codecencodings_tw.py,v 1.1 2003/12/19 03:00:06 perky Exp $
# $CJKCodecs: test_codecencodings_tw.py,v 1.2 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support
......
......@@ -3,7 +3,7 @@
# test_codecmaps_cn.py
# Codec mapping tests for PRC encodings
#
# $CJKCodecs: test_codecmaps_cn.py,v 1.2 2004/01/17 12:47:19 perky Exp $
# $CJKCodecs: test_codecmaps_cn.py,v 1.3 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support
......
#!/usr/bin/env python
#
# test_codecmaps_hk.py
# Codec mapping tests for HongKong encodings
#
# $CJKCodecs: test_codecmaps_hk.py,v 1.1 2004/07/10 17:35:20 perky Exp $
from test import test_support
from test import test_multibytecodec_support
import unittest
class TestBig5HKSCSMap(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'big5hkscs'
mapfilename = 'BIG5HKSCS.TXT'
mapfileurl = 'http://people.freebsd.org/~perky/i18n/BIG5HKSCS.TXT'
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestBig5HKSCSMap))
test_support.run_suite(suite)
test_multibytecodec_support.register_skip_expected(TestBig5HKSCSMap)
if __name__ == "__main__":
test_main()
......@@ -3,7 +3,7 @@
# test_codecmaps_jp.py
# Codec mapping tests for Japanese encodings
#
# $CJKCodecs: test_codecmaps_jp.py,v 1.2 2004/01/17 12:47:19 perky Exp $
# $CJKCodecs: test_codecmaps_jp.py,v 1.3 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support
......@@ -48,15 +48,6 @@ class TestSJISCOMPATMap(test_multibytecodec_support.TestBase_Mapping,
('\x81_', u'\\'),
]
class TestSJISSTRICTMap(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'shift_jis_strict'
mapfilename = 'SHIFTJIS.TXT'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE' \
'/EASTASIA/JIS/SHIFTJIS.TXT'
class TestEUCJISX0213Map(test_multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'euc_jisx0213'
......@@ -76,8 +67,6 @@ def test_main():
suite.addTest(unittest.makeSuite(TestCP932Map))
suite.addTest(unittest.makeSuite(TestEUCJPCOMPATMap))
suite.addTest(unittest.makeSuite(TestSJISCOMPATMap))
if test_multibytecodec_support.__cjkcodecs__:
suite.addTest(unittest.makeSuite(TestSJISSTRICTMap))
suite.addTest(unittest.makeSuite(TestEUCJISX0213Map))
suite.addTest(unittest.makeSuite(TestSJISX0213Map))
test_support.run_suite(suite)
......@@ -85,5 +74,6 @@ def test_main():
test_multibytecodec_support.register_skip_expected(TestCP932Map,
TestEUCJPCOMPATMap, TestSJISCOMPATMap, TestEUCJISX0213Map,
TestSJISX0213Map)
if __name__ == "__main__":
test_main()
......@@ -3,7 +3,7 @@
# test_codecmaps_kr.py
# Codec mapping tests for ROK encodings
#
# $CJKCodecs: test_codecmaps_kr.py,v 1.2 2004/01/17 12:47:19 perky Exp $
# $CJKCodecs: test_codecmaps_kr.py,v 1.3 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support
......
......@@ -3,7 +3,7 @@
# test_codecmaps_tw.py
# Codec mapping tests for ROC encodings
#
# $CJKCodecs: test_codecmaps_tw.py,v 1.2 2004/01/17 12:47:19 perky Exp $
# $CJKCodecs: test_codecmaps_tw.py,v 1.3 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support
......
......@@ -3,7 +3,7 @@
# test_multibytecodec.py
# Unit test for multibytecodec itself
#
# $CJKCodecs: test_multibytecodec.py,v 1.5 2004/01/06 02:26:28 perky Exp $
# $CJKCodecs: test_multibytecodec.py,v 1.8 2004/06/19 06:09:55 perky Exp $
from test import test_support
from test import test_multibytecodec_support
......@@ -69,6 +69,12 @@ class Test_StreamWriter(unittest.TestCase):
def test_str_decode(self):
self.assertEqual('abcd'.encode('gb18030'), 'abcd')
def test_streamwriter_strwrite(self):
s = StringIO.StringIO()
wr = codecs.getwriter('gb18030')(s)
wr.write('abcd')
self.assertEqual(s.getvalue(), 'abcd')
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test_StreamWriter))
......
......@@ -3,7 +3,7 @@
# test_multibytecodec_support.py
# Common Unittest Routines for CJK codecs
#
# $CJKCodecs: test_multibytecodec_support.py,v 1.5 2004/01/17 12:47:19 perky Exp $
# $CJKCodecs: test_multibytecodec_support.py,v 1.6 2004/06/19 06:09:55 perky Exp $
import sys, codecs, os.path
import unittest
......@@ -164,13 +164,8 @@ class TestBase_Mapping(unittest.TestCase):
def __init__(self, *args, **kw):
unittest.TestCase.__init__(self, *args, **kw)
if not os.path.exists(self.mapfilename):
parent = os.path.join(os.pardir, self.mapfilename)
if not os.path.exists(parent):
format = '%s not found, download from %s'
raise test_support.TestSkipped(format %
raise test_support.TestSkipped('%s not found, download from %s' %
(self.mapfilename, self.mapfileurl))
else:
self.mapfilename = parent
def test_mapping_file(self):
unichrs = lambda s: u''.join(map(unichr, map(eval, s.split('+'))))
......
......@@ -34,6 +34,9 @@ Extension modules
Library
-------
- Several new unicode codecs are added: big5hkscs, euc_jis_2004,
iso2022_jp_2004, shift_jis_2004.
- Bug #788520. Queue.{get, get_nowait, put, put_nowait} have new
implementations, exploiting Conditions (which didn't exist at the time
Queue was introduced). A minor semantic change is that the Full and
......
......@@ -484,42 +484,12 @@ GLHACK=-Dclear=__GLclear
# multibytecodec is required for all the other CJK codec modules
#_multibytecodec cjkcodecs/multibytecodec.c
# mapdata modules are required to support their respective dependent codecs
#_codecs_mapdata_ja_JP cjkcodecs/mapdata_ja_JP.c
#_codecs_mapdata_ko_KR cjkcodecs/mapdata_ko_KR.c
#_codecs_mapdata_zh_CN cjkcodecs/mapdata_zh_CN.c
#_codecs_mapdata_zh_TW cjkcodecs/mapdata_zh_TW.c
# ja_JP codecs
#_codecs_cp932 cjkcodecs/_cp932.c
#_codecs_euc_jisx0213 cjkcodecs/_euc_jisx0213.c
#_codecs_euc_jp cjkcodecs/_euc_jp.c
#_codecs_iso2022_jp cjkcodecs/_iso2022_jp.c
#_codecs_iso2022_jp_1 cjkcodecs/_iso2022_jp_1.c
#_codecs_iso2022_jp_3 cjkcodecs/_iso2022_jp_3.c
#_codecs_iso2022_jp_ext cjkcodecs/_iso2022_jp_ext.c
#_codecs_shift_jis cjkcodecs/_shift_jis.c
#_codecs_shift_jisx0213 cjkcodecs/_shift_jisx0213.c
# ko_KR codecs
#_codecs_cp949 cjkcodecs/_cp949.c
#_codecs_euc_kr cjkcodecs/_euc_kr.c
#_codecs_iso2022_kr cjkcodecs/_iso2022_kr.c
#_codecs_johab cjkcodecs/_johab.c
# zh_CN codecs
#_codecs_gb18030 cjkcodecs/_gb18030.c
#_codecs_gb2312 cjkcodecs/_gb2312.c
#_codecs_gbk cjkcodecs/_gbk.c
#_codecs_hz cjkcodecs/_hz.c
# zh_TW codecs
#_codecs_big5 cjkcodecs/_big5.c
#_codecs_cp950 cjkcodecs/_cp950.c
# international codecs
#_codecs_iso2022_jp_2 cjkcodecs/_iso2022_jp_2.c # requires ja_JP, ko_KR, zh_CN
#_codecs_cn cjkcodecs/_codecs_cn.c
#_codecs_hk cjkcodecs/_codecs_hk.c
#_codecs_iso2022 cjkcodecs/_codecs_iso2022.c
#_codecs_jp cjkcodecs/_codecs_jp.c
#_codecs_kr cjkcodecs/_codecs_kr.c
#_codecs_tw cjkcodecs/_codecs_tw.c
# Example -- included for reference only:
# xx xxmodule.c
......
......@@ -2,7 +2,7 @@ Notes on cjkcodecs
-------------------
This directory contains source files for cjkcodecs extension modules.
They are based on CJKCodecs (http://cjkpython.i18n.org/#CJKCodecs)
as of Jan 17 2004 currently.
as of Jul 18 2004 currently.
......
/*
* _big5.c: the Big5 codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _big5.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(big5)
DECMAP(big5)
ENCODER(big5)
{
while (inleft > 0) {
Py_UNICODE c = **inbuf;
DBCHAR code;
if (c < 0x80) {
RESERVE_OUTBUF(1)
**outbuf = (unsigned char)c;
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
RESERVE_OUTBUF(2)
TRYMAP_ENC(big5, code, c);
else return 1;
(*outbuf)[0] = code >> 8;
(*outbuf)[1] = code & 0xFF;
NEXT(1, 2)
}
return 0;
}
DECODER(big5)
{
while (inleft > 0) {
unsigned char c = IN1;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
RESERVE_INBUF(2)
TRYMAP_DEC(big5, **outbuf, c, IN2) {
NEXT(2, 1)
} else return 2;
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(big5)
MAPOPEN(zh_TW)
IMPORTMAP_ENCDEC(big5)
MAPCLOSE()
END_CODEC_REGISTRY(big5)
/*
* _codecs_cn.c: Codecs collection for Mainland Chinese encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _codecs_cn.c,v 1.8 2004/07/07 14:59:26 perky Exp $
*/
#include "cjkcodecs.h"
#include "mappings_cn.h"
#define GBK_PREDECODE(dc1, dc2, assi) \
if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7;
#define GBK_PREENCODE(code, assi) \
if ((code) == 0x2014) (assi) = 0xa1aa; \
else if ((code) == 0x2015) (assi) = 0xa844; \
else if ((code) == 0x00b7) (assi) = 0xa1a4;
/*
* GB2312 codec
*/
ENCODER(gb2312)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(gbcommon, code, c);
else return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;
OUT1((code >> 8) | 0x80)
OUT2((code & 0xFF) | 0x80)
NEXT(1, 2)
}
return 0;
}
DECODER(gb2312)
{
while (inleft > 0) {
unsigned char c = **inbuf;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
NEXT(2, 1)
}
else return 2;
}
return 0;
}
/*
* GBK codec
*/
ENCODER(gbk)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
GBK_PREENCODE(c, code)
else TRYMAP_ENC(gbcommon, code, c);
else return 1;
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2((code & 0xFF)) /* MSB set: GBK */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
NEXT(1, 2)
}
return 0;
}
DECODER(gbk)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
GBK_PREDECODE(c, IN2, **outbuf)
else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80);
else TRYMAP_DEC(gbkext, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
/*
* GB18030 codec
*/
ENCODER(gb18030)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1(c)
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
if (c > 0x10FFFF)
#if Py_UNICODE_SIZE == 2
return 2; /* surrogates pair */
#else
return 1;
#endif
else if (c >= 0x10000) {
ucs4_t tc = c - 0x10000;
REQUIRE_OUTBUF(4)
OUT4((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT3((unsigned char)(tc % 126) + 0x81)
tc /= 126;
OUT2((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT1((unsigned char)(tc + 0x90))
#if Py_UNICODE_SIZE == 2
NEXT(2, 4) /* surrogates pair */
#else
NEXT(1, 4)
#endif
continue;
}
REQUIRE_OUTBUF(2)
GBK_PREENCODE(c, code)
else TRYMAP_ENC(gbcommon, code, c);
else TRYMAP_ENC(gb18030ext, code, c);
else {
const struct _gb18030_to_unibmp_ranges *utrrange;
REQUIRE_OUTBUF(4)
for (utrrange = gb18030_to_unibmp_ranges;
utrrange->first != 0;
utrrange++)
if (utrrange->first <= c &&
c <= utrrange->last) {
Py_UNICODE tc;
tc = c - utrrange->first +
utrrange->base;
OUT4((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT3((unsigned char)(tc % 126) + 0x81)
tc /= 126;
OUT2((unsigned char)(tc % 10) + 0x30)
tc /= 10;
OUT1((unsigned char)tc + 0x81)
NEXT(1, 4)
break;
}
if (utrrange->first == 0) {
PyErr_SetString(PyExc_RuntimeError,
"unicode mapping invalid");
return 1;
}
continue;
}
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
NEXT(1, 2)
}
return 0;
}
DECODER(gb18030)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
c2 = IN2;
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
const struct _gb18030_to_unibmp_ranges *utr;
unsigned char c3, c4;
ucs4_t lseq;
REQUIRE_INBUF(4)
c3 = IN3;
c4 = IN4;
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
return 4;
c -= 0x81; c2 -= 0x30;
c3 -= 0x81; c4 -= 0x30;
if (c < 4) { /* U+0080 - U+FFFF */
lseq = ((ucs4_t)c * 10 + c2) * 1260 +
(ucs4_t)c3 * 10 + c4;
if (lseq < 39420) {
for (utr = gb18030_to_unibmp_ranges;
lseq >= (utr + 1)->base;
utr++) ;
OUT1(utr->first - utr->base + lseq)
NEXT(4, 1)
continue;
}
}
else if (c >= 15) { /* U+10000 - U+10FFFF */
lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
* 1260 + (ucs4_t)c3 * 10 + c4;
if (lseq <= 0x10FFFF) {
WRITEUCS4(lseq);
NEXT_IN(4)
continue;
}
}
return 4;
}
GBK_PREDECODE(c, c2, **outbuf)
else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, c2 ^ 0x80);
else TRYMAP_DEC(gbkext, **outbuf, c, c2);
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
else return 2;
NEXT(2, 1)
}
return 0;
}
/*
* HZ codec
*/
ENCODER_INIT(hz)
{
state->i = 0;
return 0;
}
ENCODER_RESET(hz)
{
if (state->i != 0) {
WRITE2('~', '}')
state->i = 0;
NEXT_OUT(2)
}
return 0;
}
ENCODER(hz)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
if (state->i == 0) {
WRITE1((unsigned char)c)
NEXT(1, 1)
}
else {
WRITE3('~', '}', (unsigned char)c)
NEXT(1, 3)
state->i = 0;
}
continue;
}
UCS4INVALID(c)
TRYMAP_ENC(gbcommon, code, c);
else return 1;
if (code & 0x8000) /* MSB set: GBK */
return 1;
if (state->i == 0) {
WRITE4('~', '{', code >> 8, code & 0xff)
NEXT(1, 4)
state->i = 1;
}
else {
WRITE2(code >> 8, code & 0xff)
NEXT(1, 2)
}
}
return 0;
}
DECODER_INIT(hz)
{
state->i = 0;
return 0;
}
DECODER_RESET(hz)
{
state->i = 0;
return 0;
}
DECODER(hz)
{
while (inleft > 0) {
unsigned char c = IN1;
if (c == '~') {
unsigned char c2 = IN2;
REQUIRE_INBUF(2)
if (c2 == '~') {
WRITE1('~')
NEXT(2, 1)
continue;
}
else if (c2 == '{' && state->i == 0)
state->i = 1; /* set GB */
else if (c2 == '}' && state->i == 1)
state->i = 0; /* set ASCII */
else if (c2 == '\n')
; /* line-continuation */
else
return 2;
NEXT(2, 0);
continue;
}
if (c & 0x80)
return 1;
if (state->i == 0) { /* ASCII mode */
WRITE1(c)
NEXT(1, 1)
}
else { /* GB mode */
REQUIRE_INBUF(2)
REQUIRE_OUTBUF(1)
TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
NEXT(2, 1)
}
else
return 2;
}
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(gb2312)
MAPPING_DECONLY(gbkext)
MAPPING_ENCONLY(gbcommon)
MAPPING_ENCDEC(gb18030ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(gb2312)
CODEC_STATELESS(gbk)
CODEC_STATELESS(gb18030)
CODEC_STATEFUL(hz)
END_CODECS_LIST
I_AM_A_MODULE_FOR(cn)
/*
* _codecs_hk.c: Codecs collection for encodings from Hong Kong
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _codecs_hk.c,v 1.3 2004/07/07 14:59:26 perky Exp $
*/
#define USING_IMPORTED_MAPS
#include "cjkcodecs.h"
#include "mappings_hk.h"
/*
* BIG5HKSCS codec
*/
static const encode_map *big5_encmap = NULL;
static const decode_map *big5_decmap = NULL;
CODEC_INIT(big5hkscs)
{
static int initialized = 0;
if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
return -1;
initialized = 1;
return 0;
}
ENCODER(big5hkscs)
{
while (inleft > 0) {
ucs4_t c = **inbuf;
DBCHAR code;
int insize;
if (c < 0x80) {
REQUIRE_OUTBUF(1)
**outbuf = (unsigned char)c;
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
insize = GET_INSIZE(c);
REQUIRE_OUTBUF(2)
if (c < 0x10000) {
TRYMAP_ENC(big5hkscs_bmp, code, c);
else TRYMAP_ENC(big5, code, c);
else return 1;
}
else if (c < 0x20000)
return insize;
else if (c < 0x30000) {
TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff);
else return insize;
}
else
return insize;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(insize, 2)
}
return 0;
}
#define BH2S(c1, c2) (((c1) - 0x88) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
DECODER(big5hkscs)
{
while (inleft > 0) {
unsigned char c = IN1;
ucs4_t decoded;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1))
goto hkscsdec;
TRYMAP_DEC(big5, **outbuf, c, IN2) {
NEXT(2, 1)
}
else
hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) {
int s = BH2S(c, IN2);
const unsigned char *hintbase;
assert(0x88 <= c && c <= 0xfe);
assert(0x40 <= IN2 && IN2 <= 0xfe);
if (BH2S(0x88, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
hintbase = big5hkscs_phint_0;
s -= BH2S(0x88, 0x40);
}
else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
hintbase = big5hkscs_phint_11939;
s -= BH2S(0xc6, 0xa1);
}
else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
hintbase = big5hkscs_phint_21733;
s -= BH2S(0xf9, 0xd6);
}
else
return MBERR_INTERNAL;
if (hintbase[s >> 3] & (1 << (s & 7))) {
WRITEUCS4(decoded | 0x20000)
NEXT_IN(2)
}
else {
OUT1(decoded)
NEXT(2, 1)
}
}
else return 2;
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(big5hkscs)
MAPPING_ENCONLY(big5hkscs_bmp)
MAPPING_ENCONLY(big5hkscs_nonbmp)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS_WINIT(big5hkscs)
END_CODECS_LIST
I_AM_A_MODULE_FOR(hk)
This diff is collapsed.
This diff is collapsed.
/*
* _johab.c: the Johab codec
* _codecs_kr.c: Codecs collection for Korean encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _johab.c,v 1.3 2003/12/31 05:46:55 perky Exp $
* $CJKCodecs: _codecs_kr.c,v 1.8 2004/07/07 14:59:26 perky Exp $
*/
#include "codeccommon.h"
#include "cjkcodecs.h"
#include "mappings_kr.h"
ENCMAP(cp949)
DECMAP(ksx1001)
/*
* EUC-KR codec
*/
ENCODER(euc_kr)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c);
else return 1;
if (code & 0x8000) /* MSB set: CP949 */
return 1;
OUT1((code >> 8) | 0x80)
OUT2((code & 0xFF) | 0x80)
NEXT(1, 2)
}
return 0;
}
DECODER(euc_kr)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
NEXT(2, 1)
} else return 2;
}
return 0;
}
/*
* CP949 codec
*/
ENCODER(cp949)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c);
else return 1;
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2(code & 0xFF) /* MSB set: CP949 */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
NEXT(1, 2)
}
return 0;
}
DECODER(cp949)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
/*
* JOHAB codec
*/
static const unsigned char u2johabidx_choseong[32] = {
0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
......@@ -50,7 +165,7 @@ ENCODER(johab)
}
UCS4INVALID(c)
RESERVE_OUTBUF(2)
REQUIRE_OUTBUF(2)
if (c >= 0xac00 && c <= 0xd7a3) {
c -= 0xac00;
......@@ -58,7 +173,8 @@ ENCODER(johab)
(u2johabidx_choseong[c / 588] << 10) |
(u2johabidx_jungseong[(c / 28) % 21] << 5) |
u2johabidx_jongseong[c % 28];
} else if (c >= 0x3131 && c <= 0x3163)
}
else if (c >= 0x3131 && c <= 0x3163)
code = u2johabjamo[c - 0x3131];
else TRYMAP_ENC(cp949, code, c) {
unsigned char c1, c2, t2;
......@@ -67,17 +183,21 @@ ENCODER(johab)
assert((code & 0x8000) == 0);
c1 = code >> 8;
c2 = code & 0xff;
if (((c1 >= 0x21 && c1 <= 0x2c) || (c1 >= 0x4a && c1 <= 0x7d))
&& (c2 >= 0x21 && c2 <= 0x7e)) {
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) : (c1 - 0x21 + 0x197));
if (((c1 >= 0x21 && c1 <= 0x2c) ||
(c1 >= 0x4a && c1 <= 0x7d)) &&
(c2 >= 0x21 && c2 <= 0x7e)) {
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
(c1 - 0x21 + 0x197));
t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
OUT1(t1 >> 1)
OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
NEXT(1, 2)
continue;
} else
}
else
return 1;
} else
}
else
return 1;
OUT1(code >> 8)
......@@ -134,7 +254,7 @@ DECODER(johab)
while (inleft > 0) {
unsigned char c = IN1, c2;
RESERVE_OUTBUF(1)
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
......@@ -142,7 +262,7 @@ DECODER(johab)
continue;
}
RESERVE_INBUF(2)
REQUIRE_INBUF(2)
c2 = IN2;
if (c < 0xd8) {
......@@ -167,20 +287,25 @@ DECODER(johab)
if (i_jong == FILL)
OUT1(0x3000)
else
OUT1(0x3100 | johabjamo_jongseong[c_jong])
} else {
OUT1(0x3100 |
johabjamo_jongseong[c_jong])
}
else {
if (i_jong == FILL)
OUT1(0x3100 | johabjamo_jungseong[c_jung])
OUT1(0x3100 |
johabjamo_jungseong[c_jung])
else
return 2;
}
} else {
if (i_jung == FILL) {
if (i_jong == FILL)
OUT1(0x3100 | johabjamo_choseong[c_cho])
OUT1(0x3100 |
johabjamo_choseong[c_cho])
else
return 2;
} else
}
else
OUT1(0xac00 +
i_cho * 588 +
i_jung * 28 +
......@@ -197,7 +322,8 @@ DECODER(johab)
else {
unsigned char t1, t2;
t1 = (c < 0xe0 ? 2 * (c - 0xd9) : 2 * c - 0x197);
t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
2 * c - 0x197);
t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
......@@ -214,10 +340,17 @@ DECODER(johab)
#undef NONE
#undef FILL
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(johab)
MAPOPEN(ko_KR)
IMPORTMAP_DEC(ksx1001)
IMPORTMAP_ENC(cp949)
MAPCLOSE()
END_CODEC_REGISTRY(johab)
BEGIN_MAPPINGS_LIST
MAPPING_DECONLY(ksx1001)
MAPPING_ENCONLY(cp949)
MAPPING_DECONLY(cp949ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(euc_kr)
CODEC_STATELESS(cp949)
CODEC_STATELESS(johab)
END_CODECS_LIST
I_AM_A_MODULE_FOR(kr)
/*
* _codecs_tw.c: Codecs collection for Taiwan's encodings
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _codecs_tw.c,v 1.10 2004/07/07 14:59:26 perky Exp $
*/
#include "cjkcodecs.h"
#include "mappings_tw.h"
/*
* BIG5 codec
*/
ENCODER(big5)
{
while (inleft > 0) {
Py_UNICODE c = **inbuf;
DBCHAR code;
if (c < 0x80) {
REQUIRE_OUTBUF(1)
**outbuf = (unsigned char)c;
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(big5, code, c);
else return 1;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(1, 2)
}
return 0;
}
DECODER(big5)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(big5, **outbuf, c, IN2) {
NEXT(2, 1)
}
else return 2;
}
return 0;
}
/*
* CP950 codec
*/
ENCODER(cp950)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp950ext, code, c);
else TRYMAP_ENC(big5, code, c);
else return 1;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(1, 2)
}
return 0;
}
DECODER(cp950)
{
while (inleft > 0) {
unsigned char c = IN1;
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
REQUIRE_INBUF(2)
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
else TRYMAP_DEC(big5, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
BEGIN_MAPPINGS_LIST
MAPPING_ENCDEC(big5)
MAPPING_ENCDEC(cp950ext)
END_MAPPINGS_LIST
BEGIN_CODECS_LIST
CODEC_STATELESS(big5)
CODEC_STATELESS(cp950)
END_CODECS_LIST
I_AM_A_MODULE_FOR(tw)
This diff is collapsed.
/*
* _cp932.c: the CP932 codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _cp932.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(jisxcommon)
ENCMAP(cp932ext)
DECMAP(jisx0208)
DECMAP(cp932ext)
ENCODER(cp932)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
unsigned char c1, c2;
if (c <= 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
} else if (c >= 0xff61 && c <= 0xff9f) {
WRITE1(c - 0xfec0)
NEXT(1, 1)
continue;
} else if (c >= 0xf8f0 && c <= 0xf8f3) {
/* Windows compatability */
RESERVE_OUTBUF(1)
if (c == 0xf8f0)
OUT1(0xa0)
else
OUT1(c - 0xfef1 + 0xfd)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
RESERVE_OUTBUF(2)
TRYMAP_ENC(cp932ext, code, c) {
OUT1(code >> 8)
OUT2(code & 0xff)
} else TRYMAP_ENC(jisxcommon, code, c) {
if (code & 0x8000) /* MSB set: JIS X 0212 */
return 1;
/* JIS X 0208 */
c1 = code >> 8;
c2 = code & 0xff;
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
c1 = (c1 - 0x21) >> 1;
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
} else if (c >= 0xe000 && c < 0xe758) {
/* User-defined area */
c1 = (Py_UNICODE)(c - 0xe000) / 188;
c2 = (Py_UNICODE)(c - 0xe000) % 188;
OUT1(c1 + 0xf0)
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
} else
return 1;
NEXT(1, 2)
}
return 0;
}
DECODER(cp932)
{
while (inleft > 0) {
unsigned char c = IN1, c2;
RESERVE_OUTBUF(1)
if (c <= 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
} else if (c >= 0xa0 && c <= 0xdf) {
if (c == 0xa0)
OUT1(0xf8f0) /* half-width katakana */
else
OUT1(0xfec0 + c)
NEXT(1, 1)
continue;
} else if (c >= 0xfd/* && c <= 0xff*/) {
/* Windows compatibility */
OUT1(0xf8f1 - 0xfd + c)
NEXT(1, 1)
continue;
}
RESERVE_INBUF(2)
c2 = IN2;
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)) {
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
return 2;
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else return 2;
} else if (c >= 0xf0 && c <= 0xf9) {
if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc))
OUT1(0xe000 + 188 * (c - 0xf0) +
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
else
return 2;
} else
return 2;
NEXT(2, 1)
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(cp932)
MAPOPEN(ja_JP)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_ENCDEC(cp932ext)
IMPORTMAP_ENC(jisxcommon)
MAPCLOSE()
END_CODEC_REGISTRY(cp932)
/*
* _cp949.c: the CP949 codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _cp949.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(cp949)
DECMAP(ksx1001)
DECMAP(cp949ext)
ENCODER(cp949)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
RESERVE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c);
else return 1;
OUT1((code >> 8) | 0x80)
if (code & 0x8000)
OUT2(code & 0xFF) /* MSB set: CP949 */
else
OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
NEXT(1, 2)
}
return 0;
}
DECODER(cp949)
{
while (inleft > 0) {
unsigned char c = IN1;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
RESERVE_INBUF(2)
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(cp949)
MAPOPEN(ko_KR)
IMPORTMAP_DEC(ksx1001)
IMPORTMAP_DEC(cp949ext)
IMPORTMAP_ENC(cp949)
MAPCLOSE()
END_CODEC_REGISTRY(cp949)
/*
* _cp950.c: the CP950 codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _cp950.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(big5)
ENCMAP(cp950ext)
DECMAP(big5)
DECMAP(cp950ext)
ENCODER(cp950)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
RESERVE_OUTBUF(2)
TRYMAP_ENC(cp950ext, code, c);
else TRYMAP_ENC(big5, code, c);
else return 1;
OUT1(code >> 8)
OUT2(code & 0xFF)
NEXT(1, 2)
}
return 0;
}
DECODER(cp950)
{
while (inleft > 0) {
unsigned char c = IN1;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
RESERVE_INBUF(2)
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
else TRYMAP_DEC(big5, **outbuf, c, IN2);
else return 2;
NEXT(2, 1)
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(cp950)
MAPOPEN(zh_TW)
IMPORTMAP_ENCDEC(big5)
IMPORTMAP_ENCDEC(cp950ext)
MAPCLOSE()
END_CODEC_REGISTRY(cp950)
/*
* _euc_jisx0213.c: the EUC-JISX0213 codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _euc_jisx0213.c,v 1.2 2003/12/31 05:46:55 perky Exp $
*/
#define USING_BINARY_PAIR_SEARCH
#include "codeccommon.h"
#include "map_jisx0213_pairs.h"
ENCMAP(jisxcommon)
DECMAP(jisx0208)
DECMAP(jisx0212)
ENCMAP(jisx0213_bmp)
DECMAP(jisx0213_1_bmp)
DECMAP(jisx0213_2_bmp)
ENCMAP(jisx0213_emp)
DECMAP(jisx0213_1_emp)
DECMAP(jisx0213_2_emp)
#define EMPBASE 0x20000
ENCODER(euc_jisx0213)
{
while (inleft > 0) {
ucs4_t c = IN1;
DBCHAR code;
int insize;
if (c < 0x80) {
WRITE1(c)
NEXT(1, 1)
continue;
}
DECODE_SURROGATE(c)
insize = GET_INSIZE(c);
if (c <= 0xFFFF) {
/* try 0213 first because it might have MULTIC */
TRYMAP_ENC(jisx0213_bmp, code, c) {
if (code == MULTIC) {
if (inleft < 2) {
if (flags & MBENC_FLUSH) {
code = find_pairencmap((ucs2_t)c, 0,
jisx0213_pairencmap, JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
} else
return MBERR_TOOFEW;
} else {
code = find_pairencmap((ucs2_t)c, (*inbuf)[1],
jisx0213_pairencmap, JISX0213_ENCPAIRS);
if (code == DBCINV) {
code = find_pairencmap((ucs2_t)c, 0,
jisx0213_pairencmap, JISX0213_ENCPAIRS);
if (code == DBCINV)
return 1;
} else
insize = 2;
}
}
} else TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITE2(0x8e, c - 0xfec0)
NEXT(1, 2)
continue;
} else if (c == 0xff3c)
/* F/W REVERSE SOLIDUS (see NOTES.euc-jisx0213) */
code = 0x2140;
else if (c == 0xff5e)
/* F/W TILDE (see NOTES.euc-jisx0213) */
code = 0x2232;
else
return 1;
} else if (c >> 16 == EMPBASE >> 16) {
TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
else return insize;
} else
return insize;
if (code & 0x8000) {
/* Codeset 2 */
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
NEXT(insize, 3)
} else {
/* Codeset 1 */
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
NEXT(insize, 2)
}
}
return 0;
}
DECODER(euc_jisx0213)
{
while (inleft > 0) {
unsigned char c = IN1;
ucs4_t code;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
unsigned char c2;
RESERVE_INBUF(2)
c2 = IN2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUT1(0xfec0 + c2)
NEXT(2, 1)
} else
return 2;
} else if (c == 0x8f) {
unsigned char c2, c3;
RESERVE_INBUF(3)
c2 = IN2 ^ 0x80;
c3 = IN3 ^ 0x80;
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES.euc-jisx0213) */
TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
PUTUCS4(EMPBASE | code)
NEXT_IN(3)
continue;
} else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
else return 3;
NEXT(3, 1)
} else {
unsigned char c2;
RESERVE_INBUF(2)
c ^= 0x80;
c2 = IN2 ^ 0x80;
/* JIS X 0213 Plane 1 */
if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
PUTUCS4(EMPBASE | code)
NEXT_IN(2)
continue;
} else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
WRITE2(code >> 16, code & 0xffff)
NEXT(2, 2)
continue;
} else return 2;
NEXT(2, 1)
}
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(euc_jisx0213)
MAPOPEN(ja_JP)
IMPORTMAP_ENC(jisxcommon)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_DEC(jisx0212)
IMPORTMAP_ENC(jisx0213_bmp)
IMPORTMAP_DEC(jisx0213_1_bmp)
IMPORTMAP_DEC(jisx0213_2_bmp)
IMPORTMAP_ENC(jisx0213_emp)
IMPORTMAP_DEC(jisx0213_1_emp)
IMPORTMAP_DEC(jisx0213_2_emp)
MAPCLOSE()
END_CODEC_REGISTRY(euc_jisx0213)
/*
* _euc_jp.c: the EUC-JP codec
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: _euc_jp.c,v 1.5 2003/12/31 05:46:55 perky Exp $
*/
#include "codeccommon.h"
ENCMAP(jisxcommon)
DECMAP(jisx0208)
DECMAP(jisx0212)
ENCODER(euc_jp)
{
while (inleft > 0) {
Py_UNICODE c = IN1;
DBCHAR code;
if (c < 0x80) {
WRITE1((unsigned char)c)
NEXT(1, 1)
continue;
}
UCS4INVALID(c)
TRYMAP_ENC(jisxcommon, code, c);
else if (c >= 0xff61 && c <= 0xff9f) {
/* JIS X 0201 half-width katakana */
WRITE2(0x8e, c - 0xfec0)
NEXT(1, 2)
continue;
}
#ifndef STRICT_BUILD
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
code = 0x2140;
else if (c == 0xa5) { /* YEN SIGN */
WRITE1(0x5c);
NEXT(1, 1)
continue;
} else if (c == 0x203e) { /* OVERLINE */
WRITE1(0x7e);
NEXT(1, 1)
continue;
}
#endif
else
return 1;
if (code & 0x8000) {
/* JIS X 0212 */
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
NEXT(1, 3)
} else {
/* JIS X 0208 */
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
NEXT(1, 2)
}
}
return 0;
}
DECODER(euc_jp)
{
while (inleft > 0) {
unsigned char c = IN1;
RESERVE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
unsigned char c2;
RESERVE_INBUF(2)
c2 = IN2;
if (c2 >= 0xa1 && c2 <= 0xdf) {
OUT1(0xfec0 + c2)
NEXT(2, 1)
} else
return 2;
} else if (c == 0x8f) {
unsigned char c2, c3;
RESERVE_INBUF(3)
c2 = IN2;
c3 = IN3;
/* JIS X 0212 */
TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
NEXT(3, 1)
} else
return 3;
} else {
unsigned char c2;
RESERVE_INBUF(2)
c2 = IN2;
/* JIS X 0208 */
#ifndef STRICT_BUILD
if (c == 0xa1 && c2 == 0xc0) /* FULL-WIDTH REVERSE SOLIDUS */
**outbuf = 0xff3c;
else
#endif
TRYMAP_DEC(jisx0208, **outbuf, c ^ 0x80, c2 ^ 0x80) ;
else return 2;
NEXT(2, 1)
}
}
return 0;
}
#include "codecentry.h"
BEGIN_CODEC_REGISTRY(euc_jp)
MAPOPEN(ja_JP)
IMPORTMAP_DEC(jisx0208)
IMPORTMAP_DEC(jisx0212)
IMPORTMAP_ENC(jisxcommon)
MAPCLOSE()
END_CODEC_REGISTRY(euc_jp)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* alg_iso8859_1.c: Encoder/Decoder macro for ISO8859-1
*
* Written by Hye-Shik Chang <perky@FreeBSD.org>
* $CJKCodecs: alg_iso8859_1.h,v 1.3 2003/12/31 05:46:55 perky Exp $
*/
#define ISO8859_1_ENCODE(c, assi) \
if ((c) <= 0xff) (assi) = (c);
#define ISO8859_1_DECODE(c, assi) \
if (1/*(c) <= 0xff*/) (assi) = (c);
This diff is collapsed.
/* $CJKCodecs: alg_jisx0201.h,v 1.2 2003/11/27 16:42:20 perky Exp $ */
/* $CJKCodecs: alg_jisx0201.h,v 1.2 2004/06/29 05:42:08 perky Exp $ */
#define JISX0201_R_ENCODE(c, assi) \
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
......@@ -24,4 +24,3 @@
#define JISX0201_DECODE(c, assi) \
JISX0201_R_DECODE(c, assi) \
else JISX0201_K_DECODE(c, assi)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment