Commit fad73711 authored by Alexander Barkov's avatar Alexander Barkov

Merging xxx_unicode_520_ci and xxx_vietnamese_ci from MySQL-5.6.

parent 55de9b04
#
# WL#2673 Unicode collation algorithm new version
#
CREATE TABLE t1 AS SELECT repeat('a', 10) as c LIMIT 0;
SHOW CREATE TABLE t1;
#
# Unicode-5.0.0 characters
#
# Latin Extended-B and IP extensions
INSERT INTO t1 VALUES (_utf32 0x0180),(_utf32 0x023A);
INSERT INTO t1 VALUES (_utf32 0x023B),(_utf32 0x023C);
INSERT INTO t1 VALUES (_utf32 0x023D),(_utf32 0x023E);
INSERT INTO t1 VALUES (_utf32 0x0241),(_utf32 0x0242);
INSERT INTO t1 VALUES (_utf32 0x0243),(_utf32 0x0244);
INSERT INTO t1 VALUES (_utf32 0x0245),(_utf32 0x0246);
INSERT INTO t1 VALUES (_utf32 0x0247),(_utf32 0x0248);
INSERT INTO t1 VALUES (_utf32 0x0249),(_utf32 0x024A);
INSERT INTO t1 VALUES (_utf32 0x024B),(_utf32 0x024C);
INSERT INTO t1 VALUES (_utf32 0x024D),(_utf32 0x024E);
INSERT INTO t1 VALUES (_utf32 0x024F),(_utf32 0x026B);
INSERT INTO t1 VALUES (_utf32 0x027D),(_utf32 0x0289);
INSERT INTO t1 VALUES (_utf32 0x028C);
# Greek and Coptic
INSERT INTO t1 VALUES (_utf32 0x037B), (_utf32 0x037C);
INSERT INTO t1 VALUES (_utf32 0x037D), (_utf32 0x03FD);
INSERT INTO t1 VALUES (_utf32 0x03FE), (_utf32 0x03FF);
# Cyrillic
INSERT INTO t1 VALUES (_utf32 0x04C0), (_utf32 0x04CF);
INSERT INTO t1 VALUES (_utf32 0x04F6), (_utf32 0x04F7);
INSERT INTO t1 VALUES (_utf32 0x04FA), (_utf32 0x04FB);
INSERT INTO t1 VALUES (_utf32 0x04FC), (_utf32 0x04FD);
INSERT INTO t1 VALUES (_utf32 0x04FE), (_utf32 0x04FF);
INSERT INTO t1 VALUES (_utf32 0x0510), (_utf32 0x0511);
INSERT INTO t1 VALUES (_utf32 0x0512), (_utf32 0x0513);
# Georgian, Georgian Supplement
INSERT INTO t1 VALUES (_utf32 0x10A0), (_utf32 0x10A1);
INSERT INTO t1 VALUES (_utf32 0x10A2), (_utf32 0x10A3);
INSERT INTO t1 VALUES (_utf32 0x10A4), (_utf32 0x10A5);
INSERT INTO t1 VALUES (_utf32 0x10A6), (_utf32 0x10A7);
INSERT INTO t1 VALUES (_utf32 0x2D00), (_utf32 0x2D01);
INSERT INTO t1 VALUES (_utf32 0x2D02), (_utf32 0x2D03);
INSERT INTO t1 VALUES (_utf32 0x2D04), (_utf32 0x2D05);
INSERT INTO t1 VALUES (_utf32 0x2D06), (_utf32 0x2D07);
# Phonetic Extensions
INSERT INTO t1 VALUES (_utf32 0x1D7D);
# Letterlike Symbols
INSERT INTO t1 VALUES (_utf32 0x2132),(_utf32 0x214E);
# Number Forms
INSERT INTO t1 VALUES (_utf32 0x2183),(_utf32 0x2184);
# Coptic
INSERT INTO t1 VALUES (_utf32 0x2C80), (_utf32 0x2C81);
INSERT INTO t1 VALUES (_utf32 0x2C82), (_utf32 0x2C83);
INSERT INTO t1 VALUES (_utf32 0x2C84), (_utf32 0x2C85);
INSERT INTO t1 VALUES (_utf32 0x2C86), (_utf32 0x2C87);
INSERT INTO t1 VALUES (_utf32 0x2C88), (_utf32 0x2C89);
INSERT INTO t1 VALUES (_utf32 0x2C8A), (_utf32 0x2C8B);
INSERT INTO t1 VALUES (_utf32 0x2C8C), (_utf32 0x2C8D);
INSERT INTO t1 VALUES (_utf32 0x2C8E), (_utf32 0x2C8F);
# Latin Extended-C
INSERT INTO t1 VALUES (_utf32 0x2C60), (_utf32 0x2C61);
INSERT INTO t1 VALUES (_utf32 0x2C62), (_utf32 0x2C63);
INSERT INTO t1 VALUES (_utf32 0x2C64), (_utf32 0x2C65);
INSERT INTO t1 VALUES (_utf32 0x2C66), (_utf32 0x2C67);
INSERT INTO t1 VALUES (_utf32 0x2C68), (_utf32 0x2C69);
INSERT INTO t1 VALUES (_utf32 0x2C6A), (_utf32 0x2C6B);
INSERT INTO t1 VALUES (_utf32 0x2C6C), (_utf32 0x2C75);
INSERT INTO t1 VALUES (_utf32 0x2C76);
# Glagolitic
INSERT INTO t1 VALUES (_utf32 0x2C00), (_utf32 0x2C01);
INSERT INTO t1 VALUES (_utf32 0x2C02), (_utf32 0x2C03);
INSERT INTO t1 VALUES (_utf32 0x2C04), (_utf32 0x2C05);
INSERT INTO t1 VALUES (_utf32 0x2C06), (_utf32 0x2C07);
INSERT INTO t1 VALUES (_utf32 0x2C30), (_utf32 0x2C31);
INSERT INTO t1 VALUES (_utf32 0x2C32), (_utf32 0x2C33);
INSERT INTO t1 VALUES (_utf32 0x2C34), (_utf32 0x2C35);
INSERT INTO t1 VALUES (_utf32 0x2C36), (_utf32 0x2C37);
# Deseret
INSERT INTO t1 VALUES (_utf32 0x10400), (_utf32 0x10401);
INSERT INTO t1 VALUES (_utf32 0x10402), (_utf32 0x10403);
INSERT INTO t1 VALUES (_utf32 0x10404), (_utf32 0x10405);
INSERT INTO t1 VALUES (_utf32 0x10406), (_utf32 0x10407);
INSERT INTO t1 VALUES (_utf32 0x10428), (_utf32 0x10429);
INSERT INTO t1 VALUES (_utf32 0x1042A), (_utf32 0x1042B);
INSERT INTO t1 VALUES (_utf32 0x1042C), (_utf32 0x1042D);
INSERT INTO t1 VALUES (_utf32 0x1042E), (_utf32 0x1042F);
#
# Unicode 5.1.0 characters
#
INSERT INTO t1 VALUES (_utf32 0x0370); # GREEK CAPITAL LETTER HETA
INSERT INTO t1 VALUES (_utf32 0x0371); # GREEK SMALL LETTER HETA
INSERT INTO t1 VALUES (_utf32 0x0372); # GREEK CAPITAL LETTER ARCHAIC SAMPI
INSERT INTO t1 VALUES (_utf32 0x0373); # GREEK SMALL LETTER ARCHAIC SAMPI
INSERT INTO t1 VALUES (_utf32 0x0514); # CYRILLIC CAPITAL LETTER LHA
INSERT INTO t1 VALUES (_utf32 0x0515); # CYRILLIC SMALL LETTER LHA
INSERT INTO t1 VALUES (_utf32 0x0516); # CYRILLIC CAPITAL LETTER RHA
INSERT INTO t1 VALUES (_utf32 0x0517); # CYRILLIC SMALL LETTER RHA
INSERT INTO t1 VALUES (_utf32 0xA640); # CYRILLIC CAPITAL LETTER ZEMLYA
INSERT INTO t1 VALUES (_utf32 0xA641); # CYRILLIC SMALL LETTER ZEMLYA
INSERT INTO t1 VALUES (_utf32 0xA642); # CYRILLIC CAPITAL LETTER DZELO
INSERT INTO t1 VALUES (_utf32 0xA643); # CYRILLIC SMALL LETTER DZELO
INSERT INTO t1 VALUES (_utf32 0xA722); # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
INSERT INTO t1 VALUES (_utf32 0xA723); # LATIN SMALL LETTER EGYPTOLOGICAL ALEF
INSERT INTO t1 VALUES (_utf32 0xA724); # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
INSERT INTO t1 VALUES (_utf32 0xA725); # LATIN SMALL LETTER EGYPTOLOGICAL AIN
INSERT INTO t1 VALUES (_utf32 0xA726); # LATIN CAPITAL LETTER HENG
INSERT INTO t1 VALUES (_utf32 0xA727); # LATIN SMALL LETTER HENG
INSERT INTO t1 VALUES (_utf32 0xA728); # LATIN CAPITAL LETTER TZ
INSERT INTO t1 VALUES (_utf32 0xA729); # LATIN SMALL LETTER TZ
INSERT INTO t1 VALUES (_utf32 0xA72A); # LATIN CAPITAL LETTER TRESILLO
INSERT INTO t1 VALUES (_utf32 0xA72B); # LATIN SMALL LETTER TRESILLO
#
# Unicode 5.2.0 characters
#
INSERT INTO t1 VALUES (_utf32 0x2CEB); # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI;Lu;0;L;;;;;N;;;;2CEC;
INSERT INTO t1 VALUES (_utf32 0x2CEC); # COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI;Ll;0;L;;;;;N;;;2CEB;;2CEB
INSERT INTO t1 VALUES (_utf32 0x2CED); # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA;Lu;0;L;;;;;N;;;;2CEE;
INSERT INTO t1 VALUES (_utf32 0x2CEE); # COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA;Ll;0;L;;;;;N;;;2CED;;2CED
#
# Check case folding and UCA weights
#
SELECT hex(c), hex(lower(c)), hex(upper(c)), hex(weight_string(c)), c
FROM t1 ORDER BY c, BINARY c;
#
# Check that LIKE works fine with and without index.
# This test makes sure that cs->min_sort_char and cs->max_sort_char
# are set properly
# Also check that LIKE is case insensitive for supplementary characters
#
INSERT INTO t1 VALUES ('a');
INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0xFFFF));
INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0x10FFFF));
INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0x10400));
SELECT hex(c), hex(weight_string(c)) FROM t1 WHERE c LIKE 'a%' ORDER BY c;
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10400 ORDER BY c, BINARY c;
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10428 ORDER BY c, BINARY c;
ALTER TABLE t1 ADD KEY(c);
EXPLAIN SELECT hex(c) FROM t1 WHERE c LIKE 'a%' ORDER BY c;
SELECT hex(c), hex(weight_string(c)) FROM t1 WHERE c LIKE 'a%' ORDER BY c;
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10400 ORDER BY c, BINARY c;
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10428 ORDER BY c, BINARY c;
DROP TABLE t1;
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -2467,6 +2467,215 @@ ZzŹźŻżŽž
ǁ
ǂ
ǃ
SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_unicode_520_ci;
GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '')
÷
×
AaÀÁÂÃÄÅàáâãäåĀāĂ㥹ǍǎǞǟǠǡǺǻẠạẢảẤấẦầẨẩẪẫẬậẮắẰằẲẳẴẵẶặ
AAAaaAaa
AEAeaEaeÆæǢǣǼǽ
Bb
ƀ
Ɓ
Ƃƃ
CcÇçĆćĈĉĊċČč
CHChcHch
Ƈƈ
DdÐðĎďĐđ
DZDzDŽDždZdzdŽdžDŽDždžDZDzdz
Ɖ
Ɗ
Ƌƌ
EeÈÉÊËèéêëĒēĔĕĖėĘęĚěẸẹẺẻẼẽẾếỀềỂểỄễỆệ
Ǝǝ
Ə
Ɛ
Ff
Ƒƒ
GgĜĝĞğĠġĢģǦǧǴǵ
Ǥǥ
Ɠ
Ɣ
Ƣƣ
HhĤĥĦħ
ƕǶ
IiÌÍÎÏìíîïĨĩĪīĬĭĮįİǏǐỈỉỊị
IJIjiJijIJij
ı
Ɨ
Ɩ
JjĴĵǰ
KkĶķǨǩ
Ƙƙ
LlĹĺĻļĽľĿŀŁł
LJLjlJljLJLjlj
LLLllLll
ƚ
ƛ
Mm
NnÑñŃńŅņŇňǸǹ
NJNjnJnjNJNjnj
Ɲ
ƞ
Ŋŋ
OoÒÓÔÕÖØòóôõöøŌōŎŏŐőƠơǑǒǪǫǬǭǾǿỌọỎỏỐốỒồỔổỖỗỘộỚớỜờỞởỠỡỢợ
OEOeoEoeŒœ
Ɔ
Ɵ
Pp
Ƥƥ
Qq
ĸ
RrŔŕŖŗŘř
RRRrrRrr
Ʀ
SsŚśŜŝŞşŠšſ
SSSssSssß
Ʃ
ƪ
TtŢţŤť
ƾ
Ŧŧ
ƫ
Ƭƭ
Ʈ
UuÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųƯưǓǔǕǖǗǘǙǚǛǜỤụỦủỨứỪừỬửỮữỰự
Ɯ
Ʊ
Vv
Ʋ
WwŴŵ
Xx
YyÝýÿŶŷŸ
Ƴƴ
ZzŹźŻżŽž
ƍ
Ƶƶ
ƷǮǯ
Ƹƹ
ƺ
Þþ
ƿǷ
ƻ
Ƨƨ
Ƽƽ
Ƅƅ
ʼn
ǀ
ǁ
ǂ
ǃ
SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_vietnamese_ci;
GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '')
÷
×
AaÀÁÃÄÅàáãäåĀāĄąǍǎǞǟǠǡǺǻẠạẢả
AAAaaAaa
AEAeaEae
ĂăẮắẰằẲẳẴẵẶặ
ÂâẤấẦầẨẩẪẫẬậ
ÆæǢǣǼǽ
Bb
ƀ
Ɓ
Ƃƃ
CcÇçĆćĈĉĊċČč
CHChcHch
Ƈƈ
DdĎď
DZDzDŽDždZdzdŽdžDŽDždžDZDzdz
Đđ
Ɖ
Ɗ
Ƌƌ
Ðð
EeÈÉËèéëĒēĔĕĖėĘęĚěẸẹẺẻẼẽ
ÊêẾếỀềỂểỄễỆệ
Ǝǝ
Ə
Ɛ
Ff
Ƒƒ
GgĜĝĞğĠġĢģǦǧǴǵ
Ǥǥ
Ɠ
Ɣ
Ƣƣ
HhĤĥ
ƕǶ
Ħħ
IiÌÍÎÏìíîïĨĩĪīĬĭĮįİǏǐỈỉỊị
IJIjiJijIJij
ı
Ɨ
Ɩ
JjĴĵǰ
KkĶķǨǩ
Ƙƙ
LlĹĺĻļĽľ
Ŀŀ
LJLjlJljLJLjlj
LLLllLll
Łł
ƚ
ƛ
Mm
NnÑñŃńŅņŇňǸǹ
NJNjnJnjNJNjnj
Ɲ
ƞ
Ŋŋ
OoÒÓÕÖòóõöŌōŎŏŐőǑǒǪǫǬǭỌọỎỏ
OEOeoEoeŒœ
ÔôỐốỒồỔổỖỗỘộ
ƠơỚớỜờỞởỠỡỢợ
ØøǾǿ
Ɔ
Ɵ
Pp
Ƥƥ
Qq
ĸ
RrŔŕŖŗŘř
RRRrrRrr
Ʀ
SsŚśŜŝŞşŠšſ
SSSssSssß
Ʃ
ƪ
TtŢţŤť
ƾ
Ŧŧ
ƫ
Ƭƭ
Ʈ
UuÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųǓǔǕǖǗǘǙǚǛǜỤụỦủ
ƯưỨứỪừỬửỮữỰự
Ɯ
Ʊ
Vv
Ʋ
WwŴŵ
Xx
YyÝýÿŶŷŸ
Ƴƴ
ZzŹźŻżŽž
ƍ
Ƶƶ
ƷǮǯ
Ƹƹ
ƺ
Þþ
ƿǷ
ƻ
Ƨƨ
Ƽƽ
Ƅƅ
ʼn
ǀ
ǁ
ǂ
ǃ
DROP TABLE t1;
#
# Start of 5.5 tests
......
......@@ -140,7 +140,7 @@
</rules>
</collation>
<collation name="utf8_maxuserid_ci" id="2047">
<collation name="utf8_maxuserid_ci" id="2047" version="4.0.0">
<rules>
<reset>a</reset>
<s>b</s>
......@@ -150,10 +150,20 @@
<charset name="utf8mb4">
<collation name="utf8mb4_test_ci" id="326">
<collation name="utf8mb4_test_ci" id="326" version="5.2.0">
<rules>
<reset>a</reset>
<s>b</s>
<!-- check shift for a supplementary character -->
<i>\u10062</i>
<!-- check reset to a supplementaty character-->
<reset>\u10400</reset>
<i>\u100400</i>
<!-- check contractions with non-ascii characters -->
<reset>d</reset>
<p>d\u017e</p>
<t>D\u017e</t>
<t>D\u017d</t>
</rules>
</collation>
<collation name="utf8mb4_test_400_ci" id="328" version="4.0.0">
......
......@@ -33,6 +33,7 @@ latin7 ISO 8859-13 Baltic latin7_general_ci 1
utf8mb4 UTF-8 Unicode utf8mb4_general_ci 4
cp1251 Windows Cyrillic cp1251_general_ci 1
utf16 UTF-16 Unicode utf16_general_ci 4
utf16le UTF-16LE Unicode utf16le_general_ci 4
cp1256 Windows Arabic cp1256_general_ci 1
cp1257 Windows Baltic cp1257_general_ci 1
utf32 UTF-32 Unicode utf32_general_ci 4
......
......@@ -79,6 +79,8 @@ utf8_hungarian_ci utf8 210 # #
utf8_sinhala_ci utf8 211 # #
utf8_german2_ci utf8 212 # #
utf8_croatian_mysql561_ci utf8 213 # #
utf8_unicode_520_ci utf8 214 # #
utf8_vietnamese_ci utf8 215 # #
utf8_general_mysql500_ci utf8 223 # #
utf8_croatian_ci utf8 576 # #
ucs2_general_ci ucs2 35 Yes # #
......@@ -105,6 +107,8 @@ ucs2_hungarian_ci ucs2 146 # #
ucs2_sinhala_ci ucs2 147 # #
ucs2_german2_ci ucs2 148 # #
ucs2_croatian_mysql561_ci ucs2 149 # #
ucs2_unicode_520_ci ucs2 150 # #
ucs2_vietnamese_ci ucs2 151 # #
ucs2_general_mysql500_ci ucs2 159 # #
ucs2_croatian_ci ucs2 640 # #
cp866_general_ci cp866 36 Yes # #
......@@ -145,6 +149,8 @@ utf8mb4_hungarian_ci utf8mb4 242 # #
utf8mb4_sinhala_ci utf8mb4 243 # #
utf8mb4_german2_ci utf8mb4 244 # #
utf8mb4_croatian_mysql561_ci utf8mb4 245 # #
utf8mb4_unicode_520_ci utf8mb4 246 # #
utf8mb4_vietnamese_ci utf8mb4 247 # #
utf8mb4_croatian_ci utf8mb4 608 # #
cp1251_bulgarian_ci cp1251 14 # #
cp1251_ukrainian_ci cp1251 23 # #
......@@ -175,6 +181,8 @@ utf16_hungarian_ci utf16 119 # #
utf16_sinhala_ci utf16 120 # #
utf16_german2_ci utf16 121 # #
utf16_croatian_mysql561_ci utf16 122 # #
utf16_unicode_520_ci utf16 123 # #
utf16_vietnamese_ci utf16 124 # #
utf16_croatian_ci utf16 672 # #
utf16le_general_ci utf16le 56 Yes # #
utf16le_bin utf16le 62 # #
......@@ -207,6 +215,8 @@ utf32_hungarian_ci utf32 178 # #
utf32_sinhala_ci utf32 179 # #
utf32_german2_ci utf32 180 # #
utf32_croatian_mysql561_ci utf32 181 # #
utf32_unicode_520_ci utf32 182 # #
utf32_vietnamese_ci utf32 183 # #
utf32_croatian_ci utf32 736 # #
binary binary 63 Yes # #
geostd8_general_ci geostd8 92 Yes # #
......
......@@ -79,6 +79,8 @@ utf8_hungarian_ci utf8 210 # #
utf8_sinhala_ci utf8 211 # #
utf8_german2_ci utf8 212 # #
utf8_croatian_mysql561_ci utf8 213 # #
utf8_unicode_520_ci utf8 214 # #
utf8_vietnamese_ci utf8 215 # #
utf8_general_mysql500_ci utf8 223 # #
utf8_croatian_ci utf8 576 # #
ucs2_general_ci ucs2 35 Yes # #
......@@ -105,6 +107,8 @@ ucs2_hungarian_ci ucs2 146 # #
ucs2_sinhala_ci ucs2 147 # #
ucs2_german2_ci ucs2 148 # #
ucs2_croatian_mysql561_ci ucs2 149 # #
ucs2_unicode_520_ci ucs2 150 # #
ucs2_vietnamese_ci ucs2 151 # #
ucs2_general_mysql500_ci ucs2 159 # #
ucs2_croatian_ci ucs2 640 # #
cp866_general_ci cp866 36 Yes # #
......@@ -145,6 +149,8 @@ utf8mb4_hungarian_ci utf8mb4 242 # #
utf8mb4_sinhala_ci utf8mb4 243 # #
utf8mb4_german2_ci utf8mb4 244 # #
utf8mb4_croatian_mysql561_ci utf8mb4 245 # #
utf8mb4_unicode_520_ci utf8mb4 246 # #
utf8mb4_vietnamese_ci utf8mb4 247 # #
utf8mb4_croatian_ci utf8mb4 608 # #
cp1251_bulgarian_ci cp1251 14 # #
cp1251_ukrainian_ci cp1251 23 # #
......@@ -175,6 +181,8 @@ utf16_hungarian_ci utf16 119 # #
utf16_sinhala_ci utf16 120 # #
utf16_german2_ci utf16 121 # #
utf16_croatian_mysql561_ci utf16 122 # #
utf16_unicode_520_ci utf16 123 # #
utf16_vietnamese_ci utf16 124 # #
utf16_croatian_ci utf16 672 # #
utf16le_general_ci utf16le 56 Yes # #
utf16le_bin utf16le 62 # #
......@@ -207,6 +215,8 @@ utf32_hungarian_ci utf32 178 # #
utf32_sinhala_ci utf32 179 # #
utf32_german2_ci utf32 180 # #
utf32_croatian_mysql561_ci utf32 181 # #
utf32_unicode_520_ci utf32 182 # #
utf32_vietnamese_ci utf32 183 # #
utf32_croatian_ci utf32 736 # #
binary binary 63 Yes # #
geostd8_general_ci geostd8 92 Yes # #
......
......@@ -61,6 +61,28 @@ insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;
# make sure utf8_test_ci is Unicode-5.0.0
SELECT hex(weight_string(_utf8mb4'a' collate utf8mb4_test_ci));
SELECT hex(weight_string(convert(_utf32 0x10002 using utf8mb4) collate utf8mb4_test_ci));
SELECT hex(@a:=convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_ci), hex(lower(@a));
SELECT hex(@a:=convert(_utf32 0x10428 using utf8mb4) collate utf8mb4_test_ci), hex(upper(@a));
SELECT hex(@a:=convert(_utf32 0x2C00 using utf8mb4) collate utf8mb4_test_ci), hex(lower(@a));
SELECT hex(@a:=convert(_utf32 0x2C30 using utf8mb4) collate utf8mb4_test_ci), hex(upper(@a));
# check that it works with supplementary characters
SELECT hex(weight_string(convert(_utf32 0x61 using utf8mb4) collate utf8mb4_test_ci));
SELECT hex(weight_string(convert(_utf32 0x62 using utf8mb4) collate utf8mb4_test_ci));
SELECT hex(weight_string(convert(_utf32 0x10062 using utf8mb4) collate utf8mb4_test_ci));
SELECT hex(weight_string(convert(_utf32 0x10400 using utf8mb4) collate utf8mb4_test_ci));
SELECT hex(weight_string(convert(_utf32 0x100400 using utf8mb4) collate utf8mb4_test_ci));
# check contractions with non-ascii characters
SELECT hex(weight_string(_utf8mb4 0x64 collate utf8mb4_test_ci));
SELECT hex(weight_string(convert(_ucs2 0x0064017e using utf8mb4) collate utf8mb4_test_ci));
SELECT hex(weight_string(convert(_ucs2 0x0044017e using utf8mb4) collate utf8mb4_test_ci));
SELECT hex(weight_string(convert(_ucs2 0x0044017d using utf8mb4) collate utf8mb4_test_ci));
#
# Bug#41084 full-text index added to custom UCA collation not working
#
......
......@@ -67,6 +67,9 @@ SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_danish_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_520_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2;
SELECT * FROM v1;
......@@ -94,6 +97,9 @@ SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_danish_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_unicode_520_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32;
SELECT * FROM v1;
......@@ -106,5 +112,8 @@ SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_danish_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_unicode_520_ci;
SELECT * FROM v1;
DROP VIEW v1;
DROP TABLE t1;
......@@ -68,6 +68,8 @@ select group_concat(c1 order by c1) from t1 group by c1 collate utf8_hungarian_c
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_croatian_mysql561_ci;
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_croatian_ci;
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_german2_ci;
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_unicode_520_ci;
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_vietnamese_ci;
ALTER TABLE t1 CONVERT TO CHARACTER SET ucs2 COLLATE ucs2_bin;
SELECT GROUP_CONCAT(c1 ORDER BY c1) FROM t1 GROUP BY c1 COLLATE ucs2_unicode_ci;
......@@ -91,6 +93,8 @@ SELECT GROUP_CONCAT(c1 ORDER BY c1) FROM t1 GROUP BY c1 COLLATE ucs2_hungarian_c
SELECT GROUP_CONCAT(c1 ORDER BY c1) FROM t1 GROUP BY c1 COLLATE ucs2_croatian_mysql561_ci;
SELECT GROUP_CONCAT(c1 ORDER BY c1) FROM t1 GROUP BY c1 COLLATE ucs2_croatian_ci;
SELECT GROUP_CONCAT(c1 ORDER BY c1) FROM t1 GROUP BY c1 COLLATE ucs2_german2_ci;
SELECT GROUP_CONCAT(c1 ORDER BY c1) FROM t1 GROUP BY c1 COLLATE ucs2_unicode_520_ci;
SELECT GROUP_CONCAT(c1 ORDER BY c1) FROM t1 GROUP BY c1 COLLATE ucs2_vietnamese_ci;
drop table t1;
......@@ -551,6 +555,11 @@ DROP TABLE t1;
SET collation_connection=utf8_german2_ci;
--source include/ctype_german.inc
--echo #
--echo # WL#2673 Unicode Collation Algorithm new version
--echo #
SET NAMES utf8mb4 COLLATE utf8mb4_unicode_520_ci;
--source include/ctype_unicode520.inc
--echo #
--echo # End of 5.6 tests
......
......@@ -55,6 +55,8 @@ select group_concat(c1 order by binary c1 separator '') from t1 group by c1 coll
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_croatian_mysql561_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_croatian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_german2_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_unicode_520_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_vietnamese_ci;
drop table t1;
......@@ -178,6 +180,13 @@ SET NAMES utf8;
SET collation_connection=utf16_german2_ci;
--source include/ctype_german.inc
--echo #
--echo # WL#2673 Unicode Collation Algorithm new version
--echo #
SET NAMES utf8mb4;
SET collation_connection=utf16_unicode_520_ci;
--source include/ctype_unicode520.inc
--echo #
--echo # End of 5.6 tests
--echo #
......@@ -56,6 +56,8 @@ select group_concat(c1 order by binary c1 separator '') from t1 group by c1 coll
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_croatian_mysql561_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_croatian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_german2_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_unicode_520_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_vietnamese_ci;
drop table t1;
......@@ -200,6 +202,13 @@ SET NAMES utf8;
SET collation_connection=utf32_german2_ci;
--source include/ctype_german.inc
--echo #
--echo # WL#2673 Unicode Collation Algorithm new version
--echo #
SET NAMES utf8mb4;
SET collation_connection=utf32_unicode_520_ci;
--source include/ctype_unicode520.inc
--echo #
--echo # End of 5.6 tests
--echo #
......@@ -35,6 +35,8 @@ SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf
SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_croatian_mysql561_ci;
SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_croatian_ci;
SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_german2_ci;
SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_unicode_520_ci;
SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_vietnamese_ci;
DROP TABLE t1;
......
......@@ -45,6 +45,8 @@ extern struct charset_info_st my_charset_ucs2_esperanto_uca_ci;
extern struct charset_info_st my_charset_ucs2_hungarian_uca_ci;
extern struct charset_info_st my_charset_ucs2_croatian_mysql561_uca_ci;
extern struct charset_info_st my_charset_ucs2_sinhala_uca_ci;
extern struct charset_info_st my_charset_ucs2_unicode_520_ci;
extern struct charset_info_st my_charset_ucs2_vietnamese_ci;
extern struct charset_info_st my_charset_ucs2_croatian_uca_ci;
#endif
......@@ -71,6 +73,8 @@ extern struct charset_info_st my_charset_utf32_esperanto_uca_ci;
extern struct charset_info_st my_charset_utf32_hungarian_uca_ci;
extern struct charset_info_st my_charset_utf32_croatian_mysql561_uca_ci;
extern struct charset_info_st my_charset_utf32_sinhala_uca_ci;
extern struct charset_info_st my_charset_utf32_unicode_520_ci;
extern struct charset_info_st my_charset_utf32_vietnamese_ci;
extern struct charset_info_st my_charset_utf32_croatian_uca_ci;
#endif /* HAVE_CHARSET_utf32 */
......@@ -97,6 +101,8 @@ extern struct charset_info_st my_charset_utf16_esperanto_uca_ci;
extern struct charset_info_st my_charset_utf16_hungarian_uca_ci;
extern struct charset_info_st my_charset_utf16_croatian_mysql561_uca_ci;
extern struct charset_info_st my_charset_utf16_sinhala_uca_ci;
extern struct charset_info_st my_charset_utf16_unicode_520_ci;
extern struct charset_info_st my_charset_utf16_vietnamese_ci;
extern struct charset_info_st my_charset_utf16_croatian_uca_ci;
#endif /* HAVE_CHARSET_utf16 */
......@@ -123,6 +129,8 @@ extern struct charset_info_st my_charset_utf8_esperanto_uca_ci;
extern struct charset_info_st my_charset_utf8_hungarian_uca_ci;
extern struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci;
extern struct charset_info_st my_charset_utf8_sinhala_uca_ci;
extern struct charset_info_st my_charset_utf8_unicode_520_ci;
extern struct charset_info_st my_charset_utf8_vietnamese_ci;
extern struct charset_info_st my_charset_utf8_croatian_uca_ci;
#ifdef HAVE_UTF8_GENERAL_CS
extern struct charset_info_st my_charset_utf8_general_cs;
......@@ -151,6 +159,8 @@ extern struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci;
extern struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci;
extern struct charset_info_st my_charset_utf8mb4_croatian_mysql561_uca_ci;
extern struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci;
extern struct charset_info_st my_charset_utf8mb4_unicode_520_ci;
extern struct charset_info_st my_charset_utf8mb4_vietnamese_ci;
extern struct charset_info_st my_charset_utf8mb4_croatian_uca_ci;
#endif /* HAVE_CHARSET_utf8mb4 */
......@@ -242,6 +252,8 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_ucs2_hungarian_uca_ci);
add_compiled_collation(&my_charset_ucs2_croatian_mysql561_uca_ci);
add_compiled_collation(&my_charset_ucs2_sinhala_uca_ci);
add_compiled_collation(&my_charset_ucs2_unicode_520_ci);
add_compiled_collation(&my_charset_ucs2_vietnamese_ci);
add_compiled_collation(&my_charset_ucs2_croatian_uca_ci);
#endif
#endif
......@@ -281,6 +293,8 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_utf8_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf8_croatian_mysql561_uca_ci);
add_compiled_collation(&my_charset_utf8_sinhala_uca_ci);
add_compiled_collation(&my_charset_utf8_unicode_520_ci);
add_compiled_collation(&my_charset_utf8_vietnamese_ci);
add_compiled_collation(&my_charset_utf8_croatian_uca_ci);
#endif
#endif /* HAVE_CHARSET_utf8 */
......@@ -312,6 +326,8 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_utf8mb4_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_croatian_mysql561_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_sinhala_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_unicode_520_ci);
add_compiled_collation(&my_charset_utf8mb4_vietnamese_ci);
add_compiled_collation(&my_charset_utf8mb4_croatian_uca_ci);
#endif /* HAVE_UCA_COLLATIONS */
#endif /* HAVE_CHARSET_utf8mb4 */
......@@ -345,6 +361,8 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_utf16_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf16_croatian_mysql561_uca_ci);
add_compiled_collation(&my_charset_utf16_sinhala_uca_ci);
add_compiled_collation(&my_charset_utf16_unicode_520_ci);
add_compiled_collation(&my_charset_utf16_vietnamese_ci);
add_compiled_collation(&my_charset_utf16_croatian_uca_ci);
#endif /* HAVE_UCA_COLLATIONS */
#endif /* HAVE_CHARSET_utf16 */
......@@ -376,6 +394,8 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_utf32_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf32_croatian_mysql561_uca_ci);
add_compiled_collation(&my_charset_utf32_sinhala_uca_ci);
add_compiled_collation(&my_charset_utf32_unicode_520_ci);
add_compiled_collation(&my_charset_utf32_vietnamese_ci);
add_compiled_collation(&my_charset_utf32_croatian_uca_ci);
#endif /* HAVE_UCA_COLLATIONS */
#endif /* HAVE_CHARSET_utf32 */
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment