Commit 4dfcb1a0 authored by Benjamin Peterson's avatar Benjamin Peterson

merge heads

parents 48deae12 ab5fcc00
......@@ -58,11 +58,16 @@ class TestBase:
result = func(source, scheme)[0]
if func is self.decode:
self.assertTrue(type(result) is str, type(result))
self.assertEqual(result, expected,
'%a.decode(%r, %r)=%a != %a'
% (source, self.encoding, scheme, result,
expected))
else:
self.assertTrue(type(result) is bytes, type(result))
self.assertEqual(result, expected,
'%a.decode(%r)=%a != %a'
% (source, self.encoding, result, expected))
self.assertEqual(result, expected,
'%a.encode(%r, %r)=%a != %a'
% (source, self.encoding, scheme, result,
expected))
else:
self.assertRaises(UnicodeError, func, source, scheme)
......@@ -279,6 +284,7 @@ class TestBase_Mapping(unittest.TestCase):
pass_enctest = []
pass_dectest = []
supmaps = []
codectests = []
def __init__(self, *args, **kw):
unittest.TestCase.__init__(self, *args, **kw)
......@@ -348,6 +354,30 @@ class TestBase_Mapping(unittest.TestCase):
if (csetch, unich) not in self.pass_dectest:
self.assertEqual(str(csetch, self.encoding), unich)
def test_errorhandle(self):
for source, scheme, expected in self.codectests:
if isinstance(source, bytes):
func = source.decode
else:
func = source.encode
if expected:
if isinstance(source, bytes):
result = func(self.encoding, scheme)
self.assertTrue(type(result) is str, type(result))
self.assertEqual(result, expected,
'%a.decode(%r, %r)=%a != %a'
% (source, self.encoding, scheme, result,
expected))
else:
result = func(self.encoding, scheme)
self.assertTrue(type(result) is bytes, type(result))
self.assertEqual(result, expected,
'%a.encode(%r, %r)=%a != %a'
% (source, self.encoding, scheme, result,
expected))
else:
self.assertRaises(UnicodeError, func, self.encoding, scheme)
def load_teststring(name):
dir = os.path.join(os.path.dirname(__file__), 'cjkencodings')
with open(os.path.join(dir, name + '.txt'), 'rb') as f:
......
......@@ -115,55 +115,56 @@ DECODER(big5hkscs)
REQUIRE_INBUF(2)
if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1))
goto hkscsdec;
if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {
TRYMAP_DEC(big5, **outbuf, c, IN2) {
NEXT(2, 1)
continue;
}
}
TRYMAP_DEC(big5hkscs, decoded, c, IN2)
{
int s = BH2S(c, IN2);
const unsigned char *hintbase;
assert(0x87 <= c && c <= 0xfe);
assert(0x40 <= IN2 && IN2 <= 0xfe);
if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
hintbase = big5hkscs_phint_0;
s -= BH2S(0x87, 0x40);
}
else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
hintbase = big5hkscs_phint_12130;
s -= BH2S(0xc6, 0xa1);
}
else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
hintbase = big5hkscs_phint_21924;
s -= BH2S(0xf9, 0xd6);
}
else
return MBERR_INTERNAL;
TRYMAP_DEC(big5, **outbuf, c, IN2) {
NEXT(2, 1)
if (hintbase[s >> 3] & (1 << (s & 7))) {
WRITEUCS4(decoded | 0x20000)
NEXT_IN(2)
}
else {
OUT1(decoded)
NEXT(2, 1)
}
continue;
}
else
hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) {
int s = BH2S(c, IN2);
const unsigned char *hintbase;
assert(0x87 <= c && c <= 0xfe);
assert(0x40 <= IN2 && IN2 <= 0xfe);
if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
hintbase = big5hkscs_phint_0;
s -= BH2S(0x87, 0x40);
}
else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
hintbase = big5hkscs_phint_12130;
s -= BH2S(0xc6, 0xa1);
}
else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
hintbase = big5hkscs_phint_21924;
s -= BH2S(0xf9, 0xd6);
}
else
return MBERR_INTERNAL;
if (hintbase[s >> 3] & (1 << (s & 7))) {
WRITEUCS4(decoded | 0x20000)
NEXT_IN(2)
}
else {
OUT1(decoded)
NEXT(2, 1)
}
}
else {
switch ((c << 8) | IN2) {
case 0x8862: WRITE2(0x00ca, 0x0304); break;
case 0x8864: WRITE2(0x00ca, 0x030c); break;
case 0x88a3: WRITE2(0x00ea, 0x0304); break;
case 0x88a5: WRITE2(0x00ea, 0x030c); break;
default: return 2;
}
NEXT(2, 2) /* all decoded codepoints are pairs, above. */
switch ((c << 8) | IN2) {
case 0x8862: WRITE2(0x00ca, 0x0304); break;
case 0x8864: WRITE2(0x00ca, 0x030c); break;
case 0x88a3: WRITE2(0x00ea, 0x0304); break;
case 0x88a5: WRITE2(0x00ea, 0x030c); break;
default: return 2;
}
NEXT(2, 2) /* all decoded codepoints are pairs, above. */
}
return 0;
......
......@@ -371,11 +371,11 @@ DECODER(euc_jp)
REQUIRE_OUTBUF(1)
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
if (c < 0x80) {
OUT1(c)
NEXT(1, 1)
continue;
}
if (c == 0x8e) {
/* JIS X 0201 half-width katakana */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment