Commit 1889c4cb authored by Xiang Zhang's avatar Xiang Zhang Committed by GitHub

bpo-29456: Fix bugs in unicodedata.normalize: u1176, u11a7 and u11c3 (GH-1958) (GH-7704)

Hangul composition check boundaries are wrong for the second character
([0x1161, 0x1176) instead of [0x1161, 0x1176]) and third character ((0x11A7, 0x11C3)
instead of [0x11A7, 0x11C3])..
(cherry picked from commit d134809c)
Co-authored-by: default avatarWonsup Yoon <pusnow@me.com>
parent fc8ea20c
......@@ -204,6 +204,19 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
b = u'C\u0338' * 20 + u'\xC7'
self.assertEqual(self.db.normalize('NFC', a), b)
def test_issue29456(self):
# Fix #29456
u1176_str_a = u'\u1100\u1176\u11a8'
u1176_str_b = u'\u1100\u1176\u11a8'
u11a7_str_a = u'\u1100\u1175\u11a7'
u11a7_str_b = u'\uae30\u11a7'
u11c3_str_a = u'\u1100\u1175\u11c3'
u11c3_str_b = u'\uae30\u11c3'
self.assertEqual(self.db.normalize('NFC', u1176_str_a), u1176_str_b)
self.assertEqual(self.db.normalize('NFC', u11a7_str_a), u11a7_str_b)
self.assertEqual(self.db.normalize('NFC', u11c3_str_a), u11c3_str_b)
def test_east_asian_width(self):
eaw = self.db.east_asian_width
self.assertRaises(TypeError, eaw, 'a')
......
......@@ -1578,6 +1578,7 @@ Jason Yeo
EungJun Yi
Bob Yodlowski
Danny Yoo
Wonsup Yoon
Rory Yorke
George Yoshida
Kazuhiro Yoshida
......
Fix bugs in hangul normalization: u1176, u11a7 and u11c3
......@@ -664,14 +664,18 @@ nfc_nfkc(PyObject *self, PyObject *input, int k)
pairs, since we always have decomposed data. */
if (LBase <= *i && *i < (LBase+LCount) &&
i + 1 < end &&
VBase <= i[1] && i[1] <= (VBase+VCount)) {
VBase <= i[1] && i[1] < (VBase+VCount)) {
/* check L character is a modern leading consonant (0x1100 ~ 0x1112)
and V character is a modern vowel (0x1161 ~ 0x1175). */
int LIndex, VIndex;
LIndex = i[0] - LBase;
VIndex = i[1] - VBase;
code = SBase + (LIndex*VCount+VIndex)*TCount;
i+=2;
if (i < end &&
TBase <= *i && *i <= (TBase+TCount)) {
TBase < *i && *i < (TBase+TCount)) {
/* check T character is a modern trailing consonant
(0x11A8 ~ 0x11C2). */
code += *i-TBase;
i++;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment