Commit d1e162e0 authored by Alexander Barkov's avatar Alexander Barkov

Merging utf16le from MySQL-5.6

added:
  mysql-test/include/ctype_heap.inc
  mysql-test/include/ctype_strtoll10.inc
  mysql-test/r/ctype_utf16le.result
  mysql-test/t/ctype_utf16le.test
modified:
  cmake/character_sets.cmake
  include/m_ctype.h
  mysql-test/r/ctype_ucs.result
  mysql-test/r/ctype_utf16.result
  mysql-test/r/ctype_utf32.result
  mysql-test/suite/funcs_1/r/innodb_func_view.result
  mysql-test/suite/funcs_1/r/memory_func_view.result
  mysql-test/suite/funcs_1/r/myisam_func_view.result
  mysql-test/suite/sys_vars/r/character_set_client_basic.result
  mysql-test/suite/sys_vars/r/character_set_connection_basic.result
  mysql-test/suite/sys_vars/r/character_set_database_basic.result
  mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result
  mysql-test/suite/sys_vars/r/character_set_results_basic.result
  mysql-test/t/ctype_ucs.test
  mysql-test/t/ctype_utf16.test
  mysql-test/t/ctype_utf32.test
  mysys/charset-def.c
  sql/item_func.cc
  sql/sys_vars.cc
  strings/ctype-latin1.c
  strings/ctype-ucs2.c
parent 41013f16
......@@ -25,14 +25,14 @@ ENDIF()
SET(CHARSETS ${DEFAULT_CHARSET} latin1 utf8 utf8mb4)
SET(CHARSETS_COMPLEX
big5 cp1250 cp932 eucjpms euckr gb2312 gbk latin1 latin2
sjis tis620 ucs2 ujis utf8 utf8mb4 utf16 utf32)
sjis tis620 ucs2 ujis utf8 utf8mb4 utf16 utf16le utf32)
SET(CHARSETS_AVAILABLE
binary armscii8 ascii big5 cp1250 cp1251 cp1256 cp1257
cp850 cp852 cp866 cp932 dec8 eucjpms euckr gb2312 gbk geostd8
greek hebrew hp8 keybcs2 koi8r koi8u
latin1 latin2 latin5 latin7 macce macroman
sjis swe7 tis620 ucs2 ujis utf8 utf8mb4 utf16 utf32)
sjis swe7 tis620 ucs2 ujis utf8 utf8mb4 utf16 utf16le utf32)
SET (EXTRA_CHARSETS "all")
......
......@@ -362,6 +362,8 @@ extern struct charset_info_st my_charset_ujis_japanese_ci;
extern struct charset_info_st my_charset_utf16_bin;
extern struct charset_info_st my_charset_utf16_general_ci;
extern struct charset_info_st my_charset_utf16_unicode_ci;
extern struct charset_info_st my_charset_utf16le_bin;
extern struct charset_info_st my_charset_utf16le_general_ci;
extern struct charset_info_st my_charset_utf32_bin;
extern struct charset_info_st my_charset_utf32_general_ci;
extern struct charset_info_st my_charset_utf32_unicode_ci;
......
--echo #
--echo # Test that cs->coll->hash_sort() ignores trailing spaces
--echo #
SELECT @@collation_connection;
CREATE TABLE t1 ENGINE=HEAP AS SELECT REPEAT (' ', 10) AS a LIMIT 0;
ALTER TABLE t1 ADD KEY (a);
CREATE TABLE t2 (a VARCHAR(10));
INSERT INTO t2 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
INSERT INTO t2 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
INSERT INTO t1 SELECT CONCAT('a',t21.a,t22.a) FROM t2 t21, t2 t22 ORDER BY 1;
DROP TABLE t2;
INSERT INTO t1 VALUES ('a ');
SELECT a, HEX(a) FROM t1 WHERE a='a';
DROP TABLE t1;
--echo #
--echo # Testing cs->cset->strtoll10()
--echo #
SELECT @@character_set_connection;
SELECT CAST('1' AS UNSIGNED);
SELECT CAST('12' AS UNSIGNED);
SELECT CAST('123' AS UNSIGNED);
SELECT CAST('1234' AS UNSIGNED);
SELECT CAST('12345' AS UNSIGNED);
SELECT CAST('123456' AS UNSIGNED);
SELECT CAST('1234567' AS UNSIGNED);
SELECT CAST('12345678' AS UNSIGNED);
SELECT CAST('123456789' AS UNSIGNED);
SELECT CAST('1234567891' AS UNSIGNED);
SELECT CAST('12345678912' AS UNSIGNED);
SELECT CAST('123456789123' AS UNSIGNED);
SELECT CAST('1234567891234' AS UNSIGNED);
SELECT CAST('12345678912345' AS UNSIGNED);
SELECT CAST('123456789123456' AS UNSIGNED);
SELECT CAST('1234567891234567' AS UNSIGNED);
SELECT CAST('12345678912345678' AS UNSIGNED);
SELECT CAST('123456789123456789' AS UNSIGNED);
SELECT CAST('1234567891234567891' AS UNSIGNED);
SELECT CAST('12345678912345678912' AS UNSIGNED);
SELECT CAST('123456789123456789123' AS UNSIGNED);
SELECT CAST('1x' AS UNSIGNED);
SELECT CAST('12x' AS UNSIGNED);
SELECT CAST('123x' AS UNSIGNED);
SELECT CAST('1234x' AS UNSIGNED);
SELECT CAST('12345x' AS UNSIGNED);
SELECT CAST('123456x' AS UNSIGNED);
SELECT CAST('1234567x' AS UNSIGNED);
SELECT CAST('12345678x' AS UNSIGNED);
SELECT CAST('123456789x' AS UNSIGNED);
SELECT CAST('1234567891x' AS UNSIGNED);
SELECT CAST('12345678912x' AS UNSIGNED);
SELECT CAST('123456789123x' AS UNSIGNED);
SELECT CAST('1234567891234x' AS UNSIGNED);
SELECT CAST('12345678912345x' AS UNSIGNED);
SELECT CAST('123456789123456x' AS UNSIGNED);
SELECT CAST('1234567891234567x' AS UNSIGNED);
SELECT CAST('12345678912345678x' AS UNSIGNED);
SELECT CAST('123456789123456789x' AS UNSIGNED);
SELECT CAST('1234567891234567891x' AS UNSIGNED);
SELECT CAST('12345678912345678912x' AS UNSIGNED);
SELECT CAST('123456789123456789123x' AS UNSIGNED);
......@@ -1274,7 +1274,7 @@ SELECT CONVERT(QUOTE(CHAR(0xf5 using ucs2)), SIGNED);
CONVERT(QUOTE(CHAR(0xf5 using ucs2)), SIGNED)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: ''
Warning 1292 Truncated incorrect INTEGER value: ''õ''
End of 5.0 tests
#
# Start of 5.1 tests
......@@ -4200,3 +4200,22 @@ DROP TABLE t1;
#
# End of 5.5 tests
#
#
# Start of 5.6 tests
#
#
# Bug#59145 valgrind warnings for uninitialized values in my_strtoll10_mb2
#
SET NAMES latin1;
SELECT CONVERT(CHAR(NULL USING ucs2), UNSIGNED);
CONVERT(CHAR(NULL USING ucs2), UNSIGNED)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: ''
DO IFNULL(CHAR(NULL USING ucs2), '');
DO CAST(CONVERT('' USING ucs2) AS UNSIGNED);
Warnings:
Warning 1292 Truncated incorrect INTEGER value: ''
#
# End of 5.6 tests
#
......@@ -921,6 +921,182 @@ Warning 1292 Truncated incorrect DOUBLE value: '1.2xxx'
select left('aaa','1');
left('aaa','1')
a
#
# Testing cs->cset->strtoll10()
#
SELECT @@character_set_connection;
@@character_set_connection
utf16
SELECT CAST('1' AS UNSIGNED);
CAST('1' AS UNSIGNED)
1
SELECT CAST('12' AS UNSIGNED);
CAST('12' AS UNSIGNED)
12
SELECT CAST('123' AS UNSIGNED);
CAST('123' AS UNSIGNED)
123
SELECT CAST('1234' AS UNSIGNED);
CAST('1234' AS UNSIGNED)
1234
SELECT CAST('12345' AS UNSIGNED);
CAST('12345' AS UNSIGNED)
12345
SELECT CAST('123456' AS UNSIGNED);
CAST('123456' AS UNSIGNED)
123456
SELECT CAST('1234567' AS UNSIGNED);
CAST('1234567' AS UNSIGNED)
1234567
SELECT CAST('12345678' AS UNSIGNED);
CAST('12345678' AS UNSIGNED)
12345678
SELECT CAST('123456789' AS UNSIGNED);
CAST('123456789' AS UNSIGNED)
123456789
SELECT CAST('1234567891' AS UNSIGNED);
CAST('1234567891' AS UNSIGNED)
1234567891
SELECT CAST('12345678912' AS UNSIGNED);
CAST('12345678912' AS UNSIGNED)
12345678912
SELECT CAST('123456789123' AS UNSIGNED);
CAST('123456789123' AS UNSIGNED)
123456789123
SELECT CAST('1234567891234' AS UNSIGNED);
CAST('1234567891234' AS UNSIGNED)
1234567891234
SELECT CAST('12345678912345' AS UNSIGNED);
CAST('12345678912345' AS UNSIGNED)
12345678912345
SELECT CAST('123456789123456' AS UNSIGNED);
CAST('123456789123456' AS UNSIGNED)
123456789123456
SELECT CAST('1234567891234567' AS UNSIGNED);
CAST('1234567891234567' AS UNSIGNED)
1234567891234567
SELECT CAST('12345678912345678' AS UNSIGNED);
CAST('12345678912345678' AS UNSIGNED)
12345678912345678
SELECT CAST('123456789123456789' AS UNSIGNED);
CAST('123456789123456789' AS UNSIGNED)
123456789123456789
SELECT CAST('1234567891234567891' AS UNSIGNED);
CAST('1234567891234567891' AS UNSIGNED)
1234567891234567891
SELECT CAST('12345678912345678912' AS UNSIGNED);
CAST('12345678912345678912' AS UNSIGNED)
12345678912345678912
SELECT CAST('123456789123456789123' AS UNSIGNED);
CAST('123456789123456789123' AS UNSIGNED)
18446744073709551615
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123456789123'
SELECT CAST('1x' AS UNSIGNED);
CAST('1x' AS UNSIGNED)
1
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1x'
SELECT CAST('12x' AS UNSIGNED);
CAST('12x' AS UNSIGNED)
12
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12x'
SELECT CAST('123x' AS UNSIGNED);
CAST('123x' AS UNSIGNED)
123
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123x'
SELECT CAST('1234x' AS UNSIGNED);
CAST('1234x' AS UNSIGNED)
1234
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234x'
SELECT CAST('12345x' AS UNSIGNED);
CAST('12345x' AS UNSIGNED)
12345
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345x'
SELECT CAST('123456x' AS UNSIGNED);
CAST('123456x' AS UNSIGNED)
123456
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456x'
SELECT CAST('1234567x' AS UNSIGNED);
CAST('1234567x' AS UNSIGNED)
1234567
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567x'
SELECT CAST('12345678x' AS UNSIGNED);
CAST('12345678x' AS UNSIGNED)
12345678
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678x'
SELECT CAST('123456789x' AS UNSIGNED);
CAST('123456789x' AS UNSIGNED)
123456789
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789x'
SELECT CAST('1234567891x' AS UNSIGNED);
CAST('1234567891x' AS UNSIGNED)
1234567891
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567891x'
SELECT CAST('12345678912x' AS UNSIGNED);
CAST('12345678912x' AS UNSIGNED)
12345678912
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678912x'
SELECT CAST('123456789123x' AS UNSIGNED);
CAST('123456789123x' AS UNSIGNED)
123456789123
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123x'
SELECT CAST('1234567891234x' AS UNSIGNED);
CAST('1234567891234x' AS UNSIGNED)
1234567891234
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567891234x'
SELECT CAST('12345678912345x' AS UNSIGNED);
CAST('12345678912345x' AS UNSIGNED)
12345678912345
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678912345x'
SELECT CAST('123456789123456x' AS UNSIGNED);
CAST('123456789123456x' AS UNSIGNED)
123456789123456
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123456x'
SELECT CAST('1234567891234567x' AS UNSIGNED);
CAST('1234567891234567x' AS UNSIGNED)
1234567891234567
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567891234567x'
SELECT CAST('12345678912345678x' AS UNSIGNED);
CAST('12345678912345678x' AS UNSIGNED)
12345678912345678
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678912345678x'
SELECT CAST('123456789123456789x' AS UNSIGNED);
CAST('123456789123456789x' AS UNSIGNED)
123456789123456789
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123456789x'
SELECT CAST('1234567891234567891x' AS UNSIGNED);
CAST('1234567891234567891x' AS UNSIGNED)
1234567891234567891
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567891234567891x'
SELECT CAST('12345678912345678912x' AS UNSIGNED);
CAST('12345678912345678912x' AS UNSIGNED)
12345678912345678912
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678912345678912x'
SELECT CAST('123456789123456789123x' AS UNSIGNED);
CAST('123456789123456789123x' AS UNSIGNED)
18446744073709551615
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123456789123x'
create table t1 (a int);
insert into t1 values ('-1234.1e2');
insert into t1 values ('-1234.1e2xxxx');
......
SET TIME_ZONE='+03:00';
DROP TABLE IF EXISTS t1;
#
# Start of 5.6 tests
#
SET NAMES utf8mb4, collation_connection=utf16le_general_ci;
SELECT HEX('a'), HEX('a ');
HEX('a') HEX('a ')
6100 61002000
select 'a' = 'a', 'a' = 'a ', 'a ' = 'a';
'a' = 'a' 'a' = 'a ' 'a ' = 'a'
1 1 1
select 'a\0' = 'a', 'a\0' < 'a', 'a\0' > 'a';
'a\0' = 'a' 'a\0' < 'a' 'a\0' > 'a'
0 1 0
select 'a' = 'a\0', 'a' < 'a\0', 'a' > 'a\0';
'a' = 'a\0' 'a' < 'a\0' 'a' > 'a\0'
0 0 1
select 'a\0' = 'a ', 'a\0' < 'a ', 'a\0' > 'a ';
'a\0' = 'a ' 'a\0' < 'a ' 'a\0' > 'a '
0 1 0
select 'a ' = 'a\0', 'a ' < 'a\0', 'a ' > 'a\0';
'a ' = 'a\0' 'a ' < 'a\0' 'a ' > 'a\0'
0 0 1
select 'a a' > 'a', 'a \0' < 'a';
'a a' > 'a' 'a \0' < 'a'
1 1
select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a';
binary 'a a' > 'a' binary 'a \0' > 'a' binary 'a\0' > 'a'
1 1 1
#
# Check that incomplete utf16le characters in HEX notation
# are left-padded with zeros
#
SELECT HEX(_utf16le 0x44);
HEX(_utf16le 0x44)
0044
SELECT HEX(_utf16le 0x3344);
HEX(_utf16le 0x3344)
3344
SELECT HEX(_utf16le 0x113344);
HEX(_utf16le 0x113344)
00113344
#
# Check that 0x20 is only trimmed when it is
# a part of real SPACE character, not just a part
# of a multibyte sequence.
# Note, CYRILLIC LETTER ER is used as an example, which
# is stored as 0x0420 in utf16le, thus contains 0x20 in the
# low byte. The second character is THREE-PER-M, U+2004,
# which contains 0x20 in the high byte.
#
CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf16le;
INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (_ucs2 X'2004',_ucs2 X'2004');
SELECT HEX(word) FROM t1 ORDER BY word;
HEX(word)
2004
0420
SELECT HEX(word2) FROM t1 ORDER BY word2;
HEX(word2)
2004
0420
DELETE FROM t1;
#
# Check that real spaces are correctly trimmed.
#
INSERT INTO t1 VALUES (_ucs2 X'042000200020', _ucs2 X'042000200020');
INSERT INTO t1 VALUES (_ucs2 X'200400200020', _ucs2 X'200400200020');
SELECT HEX(word) FROM t1 ORDER BY word;
HEX(word)
200420002000
042020002000
SELECT HEX(word2) FROM t1 ORDER BY word2;
HEX(word2)
2004
0420
DROP TABLE t1;
#
# Check LPAD/RPAD
#
CREATE TABLE t1 (a VARCHAR(10), pad INT, b VARCHAR(10)) CHARACTER SET utf16le;
INSERT INTO t1 VALUES (_ucs2 X'0420', 10, _ucs2 X'0421');
INSERT INTO t1 VALUES (_ucs2 X'0420', 10, _ucs2 X'04210422');
INSERT INTO t1 VALUES (_ucs2 X'0420', 10, _ucs2 X'042104220423');
INSERT INTO t1 VALUES (_ucs2 X'0420042104220423042404250426042704280429042A042B',10,_ucs2 X'042104220423');
Warnings:
Warning 1265 Data truncated for column 'a' at row 1
INSERT INTO t1 VALUES (_utf32 X'010000', 10, _ucs2 X'0421');
INSERT INTO t1 VALUES (_ucs2 X'0421', 10, _utf32 X'010000');
SELECT a, pad, b, LPAD(a, pad, b), HEX(LPAD(a, pad, b)) FROM t1;
a pad b LPAD(a, pad, b) HEX(LPAD(a, pad, b))
Р 10 С СССССССССР 2104210421042104210421042104210421042004
Р 10 СТ СТСТСТСТСР 2104220421042204210422042104220421042004
Р 10 СТУ СТУСТУСТУР 2104220423042104220423042104220423042004
РСТУФХЦЧШЩ 10 СТУ РСТУФХЦЧШЩ 2004210422042304240425042604270428042904
𐀀 10 С ССССССССС𐀀 21042104210421042104210421042104210400D800DC
С 10 𐀀 𐀀𐀀𐀀𐀀𐀀𐀀𐀀𐀀𐀀С 00D800DC00D800DC00D800DC00D800DC00D800DC00D800DC00D800DC00D800DC00D800DC2104
DROP TABLE t1;
CREATE TABLE t1 SELECT
LPAD(_utf16le X'2004',10,_utf16le X'2104') l,
RPAD(_utf16le X'2004',10,_utf16le X'2104') r;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`l` varchar(10) CHARACTER SET utf16le NOT NULL DEFAULT '',
`r` varchar(10) CHARACTER SET utf16le NOT NULL DEFAULT ''
) ENGINE=MyISAM DEFAULT CHARSET=latin1
SELECT HEX(l), HEX(r) FROM t1;
HEX(l) HEX(r)
2104210421042104210421042104210421042004 2004210421042104210421042104210421042104
DROP TABLE t1;
CREATE TABLE t1 (f1 CHAR(30));
INSERT INTO t1 VALUES ("103000"), ("22720000"), ("3401200"), ("78000");
SELECT LPAD(f1, 12, "-o-/") FROM t1;
LPAD(f1, 12, "-o-/")
-o-/-o103000
-o-/22720000
-o-/-3401200
-o-/-o-78000
DROP TABLE t1;
#
# Testing LIKE
#
SET NAMES utf8, collation_connection=utf16le_general_ci;
select @@collation_connection;
@@collation_connection
utf16le_general_ci
create table t1 as select repeat(' ',10) as a union select null;
alter table t1 add key(a);
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf16le DEFAULT NULL,
KEY `a` (`a`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 1 Using where; Using index
explain select * from t1 where a like concat('abc','%');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 1 Using where; Using index
select * from t1 where a like "abc%";
a
abc
abcd
select * from t1 where a like concat("abc","%");
a
abc
abcd
select * from t1 where a like "ABC%";
a
abc
abcd
select * from t1 where a like "test%";
a
test
select * from t1 where a like "te_t";
a
test
select * from t1 where a like "%a%";
a
a
abc
abcd
select * from t1 where a like "%abcd%";
a
abcd
select * from t1 where a like "%abc\d%";
a
abcd
drop table t1;
select 'AA' like 'AA';
'AA' like 'AA'
1
select 'AA' like 'A%A';
'AA' like 'A%A'
1
select 'AA' like 'A%%A';
'AA' like 'A%%A'
1
select 'AA' like 'AA%';
'AA' like 'AA%'
1
select 'AA' like '%AA%';
'AA' like '%AA%'
1
select 'AA' like '%A';
'AA' like '%A'
1
select 'AA' like '%AA';
'AA' like '%AA'
1
select 'AA' like 'A%A%';
'AA' like 'A%A%'
1
select 'AA' like '_%_%';
'AA' like '_%_%'
1
select 'AA' like '%A%A';
'AA' like '%A%A'
1
select 'AAA'like 'A%A%A';
'AAA'like 'A%A%A'
1
select 'AZ' like 'AZ';
'AZ' like 'AZ'
1
select 'AZ' like 'A%Z';
'AZ' like 'A%Z'
1
select 'AZ' like 'A%%Z';
'AZ' like 'A%%Z'
1
select 'AZ' like 'AZ%';
'AZ' like 'AZ%'
1
select 'AZ' like '%AZ%';
'AZ' like '%AZ%'
1
select 'AZ' like '%Z';
'AZ' like '%Z'
1
select 'AZ' like '%AZ';
'AZ' like '%AZ'
1
select 'AZ' like 'A%Z%';
'AZ' like 'A%Z%'
1
select 'AZ' like '_%_%';
'AZ' like '_%_%'
1
select 'AZ' like '%A%Z';
'AZ' like '%A%Z'
1
select 'AZ' like 'A_';
'AZ' like 'A_'
1
select 'AZ' like '_Z';
'AZ' like '_Z'
1
select 'AMZ'like 'A%M%Z';
'AMZ'like 'A%M%Z'
1
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16le);
INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывА'),('ФЫВА');
INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж');
INSERT INTO t1 VALUES ('фывАпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж');
INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВАПРОЛДЖ');
SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a;
a
ФЫВА
ФЫВАПРОЛДЖ
Фыва
Фывапролдж
фЫва
фЫвапролдж
фыВа
фыВапролдж
фывА
фывАпролдж
фыва
фываПролдж
фывапРолдж
фывапрОлдж
фывапроЛдж
фывапролДж
фывапролдЖ
фывапролдж
SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a;
a
ФЫВА
ФЫВАПРОЛДЖ
Фыва
Фывапролдж
фЫва
фЫвапролдж
фыВа
фыВапролдж
фывА
фывАпролдж
фыва
фываПролдж
фывапРолдж
фывапрОлдж
фывапроЛдж
фывапролДж
фывапролдЖ
фывапролдж
SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a;
a
ФЫВА
ФЫВАПРОЛДЖ
Фыва
Фывапролдж
фЫва
фЫвапролдж
фыВа
фыВапролдж
фывА
фывАпролдж
фыва
фываПролдж
фывапРолдж
фывапрОлдж
фывапроЛдж
фывапролДж
фывапролдЖ
фывапролдж
SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf16le_bin ORDER BY BINARY a;
a
фЫва
фЫвапролдж
DROP TABLE t1;
CREATE TABLE t1 (word VARCHAR(64) NOT NULL, PRIMARY KEY (word))
ENGINE=MyISAM CHARACTER SET utf16le;
INSERT INTO t1 (word) VALUES ("cat");
SELECT * FROM t1 WHERE word LIKE "c%";
word
cat
SELECT * FROM t1 WHERE word LIKE "ca_";
word
cat
SELECT * FROM t1 WHERE word LIKE "cat";
word
cat
SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630025';
word
cat
SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630061005F';
word
cat
DROP TABLE t1;
#
# Check that INSERT() works fine.
# This invokes charpos() function.
#
CREATE TABLE t1 (
a VARCHAR(10) CHARACTER SET utf16le,
b VARCHAR(10) CHARACTER SET utf16le);
INSERT INTO t1 VALUES ('abc', 'def');
SELECT INSERT(a, 10, 2, b) FROM t1;
INSERT(a, 10, 2, b)
abc
SELECT INSERT(a, 1, 2, b) FROM t1;
INSERT(a, 1, 2, b)
defc
DROP TABLE t1;
#
# Bug#1264
#
SET NAMES utf8, collation_connection=utf16le_general_ci;
#
# Two fields, index
#
CREATE TABLE t1 (
word VARCHAR(64),
bar INT(11) DEFAULT 0,
PRIMARY KEY (word))
ENGINE=MyISAM
CHARSET utf16le
COLLATE utf16le_general_ci ;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a");
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY word;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort
SELECT * FROM t1 ORDER BY word;
word bar
a 0
aar 0
aardvar 0
aardvara 0
aardvark 0
aardvarz 0
EXPLAIN SELECT word FROM t1 ORDER BY word;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL PRIMARY 258 NULL 6 Using index
SELECT word FROM t1 ORDER by word;
word
a
aar
aardvar
aardvara
aardvark
aardvarz
DROP TABLE t1;
#
# One field, index
#
CREATE TABLE t1 (
word VARCHAR(64) ,
PRIMARY KEY (word))
ENGINE=MyISAM
CHARSET utf16le
COLLATE utf16le_general_ci;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a");
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY WORD;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL PRIMARY 258 NULL 6 Using index
SELECT * FROM t1 ORDER BY word;
word
a
aar
aardvar
aardvara
aardvark
aardvarz
DROP TABLE t1;
#
# Two fields, no index
#
CREATE TABLE t1 (
word TEXT,
bar INT(11) AUTO_INCREMENT,
PRIMARY KEY (bar))
ENGINE=MyISAM
CHARSET utf16le
COLLATE utf16le_general_ci ;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a" );
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY word;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort
SELECT * FROM t1 ORDER BY word;
word bar
a 2
aar 1
aardvar 3
aardvara 5
aardvark 4
aardvarz 6
EXPLAIN SELECT word FROM t1 ORDER BY word;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using filesort
SELECT word FROM t1 ORDER BY word;
word
a
aar
aardvar
aardvara
aardvark
aardvarz
DROP TABLE t1;
#
# END OF Bug 1264 test
#
#
# Check alignment for from-binary-conversion with CAST and CONVERT
#
SELECT HEX(CAST(0xAA as char CHARACTER SET utf16le));
HEX(CAST(0xAA as char CHARACTER SET utf16le))
00AA
SELECT HEX(CONVERT(0xAA USING utf16le));
HEX(CONVERT(0xAA USING utf16le))
00AA
#
# Check alignment for string types
#
CREATE TABLE t1 (a CHAR(10) CHARACTER SET utf16le);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
HEX(a)
0001
0011
0111
1111
00011111
DROP TABLE t1;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16le);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
HEX(a)
0001
0011
0111
1111
00011111
DROP TABLE t1;
CREATE TABLE t1 (a TEXT CHARACTER SET utf16le);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
HEX(a)
0001
0011
0111
1111
00011111
DROP TABLE t1;
CREATE TABLE t1 (a MEDIUMTEXT CHARACTER SET utf16le);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
HEX(a)
0001
0011
0111
1111
00011111
DROP TABLE t1;
CREATE TABLE t1 (a LONGTEXT CHARACTER SET utf16le);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
HEX(a)
0001
0011
0111
1111
00011111
DROP TABLE t1;
#
# Bug#5081 : UCS2 fields are filled with '0x2020'
# after extending field length
#
CREATE TABLE t1(a CHAR(1)) DEFAULT CHARSET utf16le;
INSERT INTO t1 VALUES ('a'),('b'),('c');
ALTER TABLE t1 MODIFY a CHAR(5);
SELECT a, HEX(a) FROM t1;
a HEX(a)
a 6100
b 6200
c 6300
DROP TABLE t1;
#
# Check prepare statement from an UTF16 string
#
SET NAMES latin1;
SET @ivar= 1234;
SET @str1 = 'SELECT ?';
SET @str2 = CONVERT(@str1 USING utf16le);
PREPARE stmt1 FROM @str2;
EXECUTE stmt1 USING @ivar;
?
1234
#
# Check that utf16le works with ENUM and SET type
#
SET NAMES utf8, collation_connection=utf16le_general_ci;
CREATE TABLE t1 (a ENUM('x','y','z') CHARACTER SET utf16le);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` enum('x','y','z') CHARACTER SET utf16le DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES ('x');
INSERT INTO t1 VALUES ('y');
INSERT INTO t1 VALUES ('z');
SELECT a, HEX(a) FROM t1 ORDER BY a;
a HEX(a)
x 7800
y 7900
z 7A00
ALTER TABLE t1 CHANGE a a ENUM('x','y','z','d','e','ä','ö','ü') CHARACTER SET utf16le;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` enum('x','y','z','d','e','ä','ö','ü') CHARACTER SET utf16le DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES ('D');
INSERT INTO t1 VALUES ('E ');
INSERT INTO t1 VALUES ('ä');
INSERT INTO t1 VALUES ('ö');
INSERT INTO t1 VALUES ('ü');
SELECT a, HEX(a) FROM t1 ORDER BY a;
a HEX(a)
x 7800
y 7900
z 7A00
d 6400
e 6500
ä E400
ö F600
ü FC00
DROP TABLE t1;
CREATE TABLE t1 (a set ('x','y','z','ä','ö','ü') CHARACTER SET utf16le);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` set('x','y','z','ä','ö','ü') CHARACTER SET utf16le DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES ('x');
INSERT INTO t1 VALUES ('y');
INSERT INTO t1 VALUES ('z');
INSERT INTO t1 VALUES ('x,y');
INSERT INTO t1 VALUES ('x,y,z,ä,ö,ü');
SELECT a, HEX(a) FROM t1 ORDER BY a;
a HEX(a)
x 7800
y 7900
x,y 78002C007900
z 7A00
x,y,z,ä,ö,ü 78002C0079002C007A002C00E4002C00F6002C00FC00
DROP TABLE t1;
#
# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added
#
CREATE TABLE t1(a ENUM('a','b','c')) DEFAULT CHARACTER SET utf16le;
INSERT INTO t1 VALUES('a'),('b'),('c');
ALTER TABLE t1 ADD b CHAR(1);
SHOW WARNINGS;
Level Code Message
SELECT * FROM t1 ORDER BY a;
a b
a NULL
b NULL
c NULL
DROP TABLE t1;
SET NAMES utf8, collation_connection='utf16le_general_ci';
create table t1 select repeat('a',4000) a;
delete from t1;
insert into t1 values ('a'), ('a '), ('a\t');
select collation(a),hex(a) from t1 order by a;
collation(a) hex(a)
utf16le_general_ci 61000900
utf16le_general_ci 6100
utf16le_general_ci 61002000
drop table t1;
select @@collation_connection;
@@collation_connection
utf16le_general_ci
create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ;
insert into t1 values('abcdef');
insert into t1 values('_bcdef');
insert into t1 values('a_cdef');
insert into t1 values('ab_def');
insert into t1 values('abc_ef');
insert into t1 values('abcd_f');
insert into t1 values('abcde_');
select c1 as c1u from t1 where c1 like 'ab\_def';
c1u
ab_def
select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
c2h
ab_def
drop table t1;
SET NAMES utf8, collation_connection='utf16le_bin';
create table t1 select repeat('a',4000) a;
delete from t1;
insert into t1 values ('a'), ('a '), ('a\t');
select collation(a),hex(a) from t1 order by a;
collation(a) hex(a)
utf16le_bin 61000900
utf16le_bin 6100
utf16le_bin 61002000
drop table t1;
#
# Bug#55980 Character sets: supplementary character _bin ordering is wrong
#
CREATE TABLE t1 AS SELECT REPEAT('a',1) AS a LIMIT 0;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(1) CHARACTER SET utf16le COLLATE utf16le_bin NOT NULL DEFAULT ''
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES (_utf8mb4 0xEFBE9D),(_utf8mb4 0xF0908E84);
INSERT INTO t1 VALUES (_utf8mb4 0xCE85),(_utf8mb4 0xF4808080);
SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a;
HEX(a) HEX(CONVERT(a USING utf8mb4))
8503 CE85
9DFF EFBE9D
00D884DF F0908E84
C0DB00DC F4808080
ALTER TABLE t1 ADD KEY(a);
SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a;
HEX(a) HEX(CONVERT(a USING utf8mb4))
8503 CE85
9DFF EFBE9D
00D884DF F0908E84
C0DB00DC F4808080
DROP TABLE IF EXISTS t1;
select @@collation_connection;
@@collation_connection
utf16le_bin
create table t1 ROW_FORMAT=DYNAMIC select repeat('a',50) as c1 ;
insert into t1 values('abcdef');
insert into t1 values('_bcdef');
insert into t1 values('a_cdef');
insert into t1 values('ab_def');
insert into t1 values('abc_ef');
insert into t1 values('abcd_f');
insert into t1 values('abcde_');
select c1 as c1u from t1 where c1 like 'ab\_def';
c1u
ab_def
select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
c2h
ab_def
drop table t1;
#
# Bug#10344 Some string functions fail for UCS2
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16le, pos INT);
INSERT INTO t1 VALUES (_ucs2 0x00e400e50068,1);
INSERT INTO t1 VALUES (_ucs2 0x00e400e50068,2);
INSERT INTO t1 VALUES (_ucs2 0x00e400e50068,3);
INSERT INTO t1 VALUES (_ucs2 0x00e400e50068,-1);
INSERT INTO t1 VALUES (_ucs2 0x00e400e50068,-2);
INSERT INTO t1 VALUES (_ucs2 0x00e400e50068,-3);
INSERT INTO t1 VALUES (_utf32 0x000000e4000000e500010000, 1);
INSERT INTO t1 VALUES (_utf32 0x000000e4000000e500010000, 2);
INSERT INTO t1 VALUES (_utf32 0x000000e4000000e500010000, 3);
INSERT INTO t1 VALUES (_utf32 0x000000e4000000e500010000, -1);
INSERT INTO t1 VALUES (_utf32 0x000000e4000000e500010000, -2);
INSERT INTO t1 VALUES (_utf32 0x000000e4000000e500010000, -3);
SELECT HEX(SUBSTR(a, pos)), SUBSTR(a, pos) FROM t1;
HEX(SUBSTR(a, pos)) SUBSTR(a, pos)
E400E5006800 äåh
E5006800 åh
6800 h
6800 h
E5006800 åh
E400E5006800 äåh
E400E50000D800DC äå?
E50000D800DC å?
00D800DC ?
00D800DC ?
E50000D800DC å?
E400E50000D800DC äå?
DROP TABLE t1;
SET NAMES utf8, collation_connection=utf16le_general_ci;
#
# Bug#9442 Set parameter make query fail if column CHARACTER SET is UCS2
#
CREATE TABLE t1 (utext VARCHAR(20) CHARACTER SET utf16le);
INSERT INTO t1 VALUES ("lily");
INSERT INTO t1 VALUES ("river");
PREPARE stmt FROM 'SELECT utext FROM t1 where utext like ?';
SET @param1='%%';
EXECUTE stmt USING @param1;
utext
lily
river
EXECUTE stmt USING @param1;
utext
lily
river
SELECT utext FROM t1 where utext like '%%';
utext
lily
river
DROP TABLE t1;
DEALLOCATE PREPARE stmt;
#
# Bug#22052 Trailing spaces are not removed FROM UNICODE fields in an index
#
CREATE TABLE t1 (
a CHAR(10) CHARACTER SET utf16le NOT NULL,
INDEX a (a)
) engine=myisam;
INSERT INTO t1 VALUES (REPEAT(_ucs2 0x201f, 10));
INSERT INTO t1 VALUES (REPEAT(_ucs2 0x2020, 10));
INSERT INTO t1 VALUES (REPEAT(_ucs2 0x2021, 10));
# make sure "index read" is used
explain SELECT HEX(a) FROM t1 ORDER BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL a 40 NULL 3 Using index
SELECT HEX(a) FROM t1 ORDER BY a;
HEX(a)
1F201F201F201F201F201F201F201F201F201F20
2020202020202020202020202020202020202020
2120212021202120212021202120212021202120
ALTER TABLE t1 DROP INDEX a;
SELECT HEX(a) FROM t1 ORDER BY a;
HEX(a)
1F201F201F201F201F201F201F201F201F201F20
2020202020202020202020202020202020202020
2120212021202120212021202120212021202120
DROP TABLE t1;
#
# Bug #20108: corrupted default enum value for a ucs2 field
#
CREATE TABLE t1 (
status ENUM('active','passive') CHARACTER SET utf16le COLLATE utf16le_general_ci
NOT NULL DEFAULT 'passive'
);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`status` enum('active','passive') CHARACTER SET utf16le NOT NULL DEFAULT 'passive'
) ENGINE=MyISAM DEFAULT CHARSET=latin1
ALTER TABLE t1 ADD a int NOT NULL AFTER status;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`status` enum('active','passive') CHARACTER SET utf16le NOT NULL DEFAULT 'passive',
`a` int(11) NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1;
#
# Conversion FROM an UTF16LE string to a decimal column
#
CREATE TABLE t1 (a VARCHAR(64) CHARACTER SET utf16le, b DECIMAL(10,3));
INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0);
UPDATE t1 set b=a;
SELECT *, HEX(a) FROM t1;
a b HEX(a)
1.1 1.100 31002E003100
2.1 2.100 32002E003100
DROP TABLE t1;
#
# Bug#9442 Set parameter make query fail if column CHARACTER SET is UCS2
#
CREATE TABLE t1 (utext VARCHAR(20) CHARACTER SET utf16le);
INSERT INTO t1 VALUES ("lily");
INSERT INTO t1 VALUES ("river");
PREPARE stmt FROM 'SELECT utext FROM t1 where utext like ?';
SET @param1='%%';
EXECUTE stmt USING @param1;
utext
lily
river
EXECUTE stmt USING @param1;
utext
lily
river
SELECT utext FROM t1 where utext like '%%';
utext
lily
river
DROP TABLE t1;
DEALLOCATE PREPARE stmt;
#
# Bug#22638 SOUNDEX broken for international characters
#
SET NAMES utf8, collation_connection=utf16le_general_ci;
SELECT SOUNDEX(''),SOUNDEX('he'),SOUNDEX('hello all folks'),SOUNDEX('#3556 in bugdb');
SOUNDEX('') SOUNDEX('he') SOUNDEX('hello all folks') SOUNDEX('#3556 in bugdb')
H000 H4142 I51231
SELECT HEX(SOUNDEX('')),HEX(SOUNDEX('he')),HEX(SOUNDEX('hello all folks')),HEX(SOUNDEX('#3556 in bugdb'));
HEX(SOUNDEX('')) HEX(SOUNDEX('he')) HEX(SOUNDEX('hello all folks')) HEX(SOUNDEX('#3556 in bugdb'))
4800300030003000 48003400310034003200 490035003100320033003100
SELECT 'mood' sounds like 'mud';
'mood' sounds like 'mud'
1
# Cyrillic A, BE, VE
SELECT HEX(SOUNDEX(_utf16le 0x041004110412));
HEX(SOUNDEX(_utf16le 0x041004110412))
0410300030003000
# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter
SELECT HEX(SOUNDEX(_utf16le 0x00BF00C0));
HEX(SOUNDEX(_utf16le 0x00BF00C0))
00BF300030003000
#
# Bug#14290: character_maximum_length for text fields
#
CREATE TABLE t1(a BLOB, b TEXT CHARSET utf16le);
SELECT data_type, character_octet_length, character_maximum_length
FROM information_schema.columns where table_name='t1';
data_type character_octet_length character_maximum_length
blob 65535 65535
text 65535 32767
DROP TABLE t1;
SET NAMES utf8, collation_connection=utf16le_general_ci;
#
# Testing cs->coll->instr()
#
SELECT POSITION('bb' IN 'abba');
POSITION('bb' IN 'abba')
2
#
# Testing cs->coll->hash_sort()
#
SET NAMES utf8, collation_connection=utf16le_bin;
#
# Test that cs->coll->hash_sort() ignores trailing spaces
#
SELECT @@collation_connection;
@@collation_connection
utf16le_bin
CREATE TABLE t1 ENGINE=HEAP AS SELECT REPEAT (' ', 10) AS a LIMIT 0;
ALTER TABLE t1 ADD KEY (a);
CREATE TABLE t2 (a VARCHAR(10));
INSERT INTO t2 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
INSERT INTO t2 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
INSERT INTO t1 SELECT CONCAT('a',t21.a,t22.a) FROM t2 t21, t2 t22 ORDER BY 1;
DROP TABLE t2;
INSERT INTO t1 VALUES ('a ');
SELECT a, HEX(a) FROM t1 WHERE a='a';
a HEX(a)
a 61002000
DROP TABLE t1;
SET NAMES utf8, collation_connection=utf16le_general_ci;
#
# Test that cs->coll->hash_sort() ignores trailing spaces
#
SELECT @@collation_connection;
@@collation_connection
utf16le_general_ci
CREATE TABLE t1 ENGINE=HEAP AS SELECT REPEAT (' ', 10) AS a LIMIT 0;
ALTER TABLE t1 ADD KEY (a);
CREATE TABLE t2 (a VARCHAR(10));
INSERT INTO t2 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
INSERT INTO t2 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
INSERT INTO t1 SELECT CONCAT('a',t21.a,t22.a) FROM t2 t21, t2 t22 ORDER BY 1;
DROP TABLE t2;
INSERT INTO t1 VALUES ('a ');
SELECT a, HEX(a) FROM t1 WHERE a='a';
a HEX(a)
a 61002000
DROP TABLE t1;
#
# Testing cs->cset->numchars()
#
SELECT CHAR_LENGTH('abcd'), OCTET_LENGTH('abcd');
CHAR_LENGTH('abcd') OCTET_LENGTH('abcd')
4 8
SELECT CHAR_LENGTH(_utf16le 0x00D800DC), OCTET_LENGTH(_utf16le 0x00D800DC);
CHAR_LENGTH(_utf16le 0x00D800DC) OCTET_LENGTH(_utf16le 0x00D800DC)
1 4
SELECT CHAR_LENGTH(_utf16le 0x7DD8FFDF), OCTET_LENGTH(_utf16le 0x7FD8DDDF);
CHAR_LENGTH(_utf16le 0x7DD8FFDF) OCTET_LENGTH(_utf16le 0x7FD8DDDF)
1 4
#
# Testing cs->cset->charpos()
#
SELECT LEFT('abcd',2);
LEFT('abcd',2)
ab
SELECT HEX(LEFT(_utf16le 0x00D800DC7FD8FFDF, 1));
HEX(LEFT(_utf16le 0x00D800DC7FD8FFDF, 1))
00D800DC
SELECT HEX(RIGHT(_utf16le 0x00D800DC7FD8FFDF, 1));
HEX(RIGHT(_utf16le 0x00D800DC7FD8FFDF, 1))
7FD8FFDF
#
# Testing cs->cset->well_formed_length()
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16le);
# Bad sequences
INSERT INTO t1 VALUES (_utf16le 0x00D8);
ERROR HY000: Invalid utf16le character string: '00D8'
INSERT INTO t1 VALUES (_utf16le 0x00DC);
ERROR HY000: Invalid utf16le character string: '00DC'
INSERT INTO t1 VALUES (_utf16le 0x00D800D8);
ERROR HY000: Invalid utf16le character string: '00D800'
INSERT INTO t1 VALUES (_utf16le 0x00D800E8);
ERROR HY000: Invalid utf16le character string: '00D800'
INSERT INTO t1 VALUES (_utf16le 0x00D80008);
ERROR HY000: Invalid utf16le character string: '00D800'
# Good sequences
INSERT INTO t1 VALUES (_utf16le 0x00D800DC);
INSERT INTO t1 VALUES (_utf16le 0x00D8FFDC);
INSERT INTO t1 VALUES (_utf16le 0xFFDB00DC);
INSERT INTO t1 VALUES (_utf16le 0xFFDBFFDC);
SELECT HEX(a) FROM t1;
HEX(a)
00D800DC
00D8FFDC
FFDB00DC
FFDBFFDC
DROP TABLE t1;
#
# Bug#32393 Character sets: illegal characters in utf16le columns
#
# Tests that cs->cset->wc_mb() doesn't accept surrogate parts
#
# via ALTER
#
CREATE TABLE t1 (s1 VARCHAR(50) CHARACTER SET ucs2);
INSERT INTO t1 VALUES (0xDF84);
ALTER TABLE t1 MODIFY column s1 VARCHAR(50) CHARACTER SET utf16le;
Warnings:
Warning 1366 Incorrect string value: '\xDF\x84' for column 's1' at row 1
SELECT HEX(s1) FROM t1;
HEX(s1)
3F00
DROP TABLE t1;
#
# via UPDATE
#
CREATE TABLE t1 (s1 VARCHAR(5) CHARACTER SET ucs2, s2 VARCHAR(5) CHARACTER SET utf16le);
INSERT INTO t1 (s1) VALUES (0xdf84);
UPDATE t1 set s2 = s1;
Warnings:
Warning 1366 Incorrect string value: '\xDF\x84' for column 's2' at row 1
SELECT HEX(s2) FROM t1;
HEX(s2)
3F00
DROP TABLE t1;
#
# Testing cs->cset->lengthsp()
#
CREATE TABLE t1 (a CHAR(10)) CHARACTER SET utf16le;
INSERT INTO t1 VALUES ('a ');
SELECT HEX(a) FROM t1;
HEX(a)
6100
DROP TABLE t1;
#
# Testing cs->cset->caseup() and cs->cset->casedn()
#
SELECT UPPER('abcd'), LOWER('ABCD');
UPPER('abcd') LOWER('ABCD')
ABCD abcd
#
# Checking str_to_datetime()
#
select @@collation_connection;
@@collation_connection
utf16le_general_ci
CREATE TABLE t1 (a date);
INSERT INTO t1 VALUES ('2007-09-16');
SELECT * FROM t1;
a
2007-09-16
DROP TABLE t1;
#
# Testing cs->cset->ll10tostr
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16le);
INSERT INTO t1 VALUES (123456);
SELECT a, HEX(a) FROM t1;
a HEX(a)
123456 310032003300340035003600
DROP TABLE t1;
#
# Testing cs->cset->fill
# SOUNDEX fills strings with DIGIT ZERO up to four characters
#
SELECT SOUNDEX('a'), HEX(SOUNDEX('a'));
SOUNDEX('a') HEX(SOUNDEX('a'))
A000 4100300030003000
#
# Testing cs->cset->strntoul
#
CREATE TABLE t1 (a enum ('a','b','c')) CHARACTER SET utf16le;
INSERT INTO t1 VALUES ('1');
SELECT * FROM t1;
a
a
DROP TABLE t1;
#
# Testing cs->cset->strntoll and cs->cset->strntoull
#
SET NAMES latin1;
SELECT HEX(CONV(CONVERT('123' USING utf16le), -10, 16));
HEX(CONV(CONVERT('123' USING utf16le), -10, 16))
3742
SELECT HEX(CONV(CONVERT('123' USING utf16le), 10, 16));
HEX(CONV(CONVERT('123' USING utf16le), 10, 16))
3742
SET NAMES utf8, collation_connection=utf16le_general_ci;
#
# Testing cs->cset->strntod
#
SET NAMES utf8, collation_connection=utf16le_general_ci;
SELECT 1.1 + '1.2';
1.1 + '1.2'
2.3
SELECT 1.1 + '1.2xxx';
1.1 + '1.2xxx'
2.3
Warnings:
Warning 1292 Truncated incorrect DOUBLE value: '1.2xxx'
#
# Testing cs->cset->strtoll10
#
SELECT LEFT('aaa','1');
LEFT('aaa','1')
a
CREATE TABLE t1 AS SELECT REPEAT('abcd', 128) AS a;
SELECT LEFT(a, '2') FROM t1;
LEFT(a, '2')
ab
SELECT LEFT(a, ' \t \t 2') FROM t1;
LEFT(a, ' \t \t 2')
ab
SELECT LEFT(a, ' \t \t +2') FROM t1;
LEFT(a, ' \t \t +2')
ab
SELECT SUBSTR(a, '-2') FROM t1;
SUBSTR(a, '-2')
cd
SELECT SUBSTR(a, ' \t \t -2') FROM t1;
SUBSTR(a, ' \t \t -2')
cd
SELECT LEFT(a, '00002') FROM t1;
LEFT(a, '00002')
ab
SELECT LEFT(a, ' \t \t 00002') FROM t1;
LEFT(a, ' \t \t 00002')
ab
SELECT LEFT(a, ' \t \t +00002') FROM t1;
LEFT(a, ' \t \t +00002')
ab
SELECT SUBSTR(a, '-00002') FROM t1;
SUBSTR(a, '-00002')
cd
SELECT SUBSTR(a, ' \t \t -00002') FROM t1;
SUBSTR(a, ' \t \t -00002')
cd
DROP TABLE t1;
CREATE TABLE t1 AS SELECT REPEAT('abcd', 128) AS a LIMIT 0;
INSERT INTO t1 VALUES ('255'), ('65535'),('16777215'),('4294967295'),
('1099511627775'),('281474976710655'),('72057594037927935'),
('1844674407370955161'),('18446744073709551614'), ('18446744073709551615');
SELECT a, CAST(a AS SIGNED), CAST(a AS UNSIGNED) FROM t1;
a CAST(a AS SIGNED) CAST(a AS UNSIGNED)
255 255 255
65535 65535 65535
16777215 16777215 16777215
4294967295 4294967295 4294967295
1099511627775 1099511627775 1099511627775
281474976710655 281474976710655 281474976710655
72057594037927935 72057594037927935 72057594037927935
1844674407370955161 1844674407370955161 1844674407370955161
18446744073709551614 -2 18446744073709551614
18446744073709551615 -1 18446744073709551615
Warnings:
Note 1105 Cast to signed converted positive out-of-range integer to it's negative complement
Note 1105 Cast to signed converted positive out-of-range integer to it's negative complement
UPDATE t1 SET a=CONCAT('-', a);
SELECT a, CAST(a AS SIGNED) FROM t1;
a CAST(a AS SIGNED)
-255 -255
-65535 -65535
-16777215 -16777215
-4294967295 -4294967295
-1099511627775 -1099511627775
-281474976710655 -281474976710655
-72057594037927935 -72057594037927935
-1844674407370955161 -1844674407370955161
-18446744073709551614 -9223372036854775808
-18446744073709551615 -9223372036854775808
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '-18446744073709551614'
Warning 1292 Truncated incorrect INTEGER value: '-18446744073709551615'
DROP TABLE t1;
#
# Testing cs->cset->strtoll10()
#
SELECT @@character_set_connection;
@@character_set_connection
utf16le
SELECT CAST('1' AS UNSIGNED);
CAST('1' AS UNSIGNED)
1
SELECT CAST('12' AS UNSIGNED);
CAST('12' AS UNSIGNED)
12
SELECT CAST('123' AS UNSIGNED);
CAST('123' AS UNSIGNED)
123
SELECT CAST('1234' AS UNSIGNED);
CAST('1234' AS UNSIGNED)
1234
SELECT CAST('12345' AS UNSIGNED);
CAST('12345' AS UNSIGNED)
12345
SELECT CAST('123456' AS UNSIGNED);
CAST('123456' AS UNSIGNED)
123456
SELECT CAST('1234567' AS UNSIGNED);
CAST('1234567' AS UNSIGNED)
1234567
SELECT CAST('12345678' AS UNSIGNED);
CAST('12345678' AS UNSIGNED)
12345678
SELECT CAST('123456789' AS UNSIGNED);
CAST('123456789' AS UNSIGNED)
123456789
SELECT CAST('1234567891' AS UNSIGNED);
CAST('1234567891' AS UNSIGNED)
1234567891
SELECT CAST('12345678912' AS UNSIGNED);
CAST('12345678912' AS UNSIGNED)
12345678912
SELECT CAST('123456789123' AS UNSIGNED);
CAST('123456789123' AS UNSIGNED)
123456789123
SELECT CAST('1234567891234' AS UNSIGNED);
CAST('1234567891234' AS UNSIGNED)
1234567891234
SELECT CAST('12345678912345' AS UNSIGNED);
CAST('12345678912345' AS UNSIGNED)
12345678912345
SELECT CAST('123456789123456' AS UNSIGNED);
CAST('123456789123456' AS UNSIGNED)
123456789123456
SELECT CAST('1234567891234567' AS UNSIGNED);
CAST('1234567891234567' AS UNSIGNED)
1234567891234567
SELECT CAST('12345678912345678' AS UNSIGNED);
CAST('12345678912345678' AS UNSIGNED)
12345678912345678
SELECT CAST('123456789123456789' AS UNSIGNED);
CAST('123456789123456789' AS UNSIGNED)
123456789123456789
SELECT CAST('1234567891234567891' AS UNSIGNED);
CAST('1234567891234567891' AS UNSIGNED)
1234567891234567891
SELECT CAST('12345678912345678912' AS UNSIGNED);
CAST('12345678912345678912' AS UNSIGNED)
12345678912345678912
SELECT CAST('123456789123456789123' AS UNSIGNED);
CAST('123456789123456789123' AS UNSIGNED)
18446744073709551615
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123456789123'
SELECT CAST('1x' AS UNSIGNED);
CAST('1x' AS UNSIGNED)
1
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1x'
SELECT CAST('12x' AS UNSIGNED);
CAST('12x' AS UNSIGNED)
12
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12x'
SELECT CAST('123x' AS UNSIGNED);
CAST('123x' AS UNSIGNED)
123
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123x'
SELECT CAST('1234x' AS UNSIGNED);
CAST('1234x' AS UNSIGNED)
1234
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234x'
SELECT CAST('12345x' AS UNSIGNED);
CAST('12345x' AS UNSIGNED)
12345
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345x'
SELECT CAST('123456x' AS UNSIGNED);
CAST('123456x' AS UNSIGNED)
123456
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456x'
SELECT CAST('1234567x' AS UNSIGNED);
CAST('1234567x' AS UNSIGNED)
1234567
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567x'
SELECT CAST('12345678x' AS UNSIGNED);
CAST('12345678x' AS UNSIGNED)
12345678
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678x'
SELECT CAST('123456789x' AS UNSIGNED);
CAST('123456789x' AS UNSIGNED)
123456789
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789x'
SELECT CAST('1234567891x' AS UNSIGNED);
CAST('1234567891x' AS UNSIGNED)
1234567891
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567891x'
SELECT CAST('12345678912x' AS UNSIGNED);
CAST('12345678912x' AS UNSIGNED)
12345678912
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678912x'
SELECT CAST('123456789123x' AS UNSIGNED);
CAST('123456789123x' AS UNSIGNED)
123456789123
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123x'
SELECT CAST('1234567891234x' AS UNSIGNED);
CAST('1234567891234x' AS UNSIGNED)
1234567891234
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567891234x'
SELECT CAST('12345678912345x' AS UNSIGNED);
CAST('12345678912345x' AS UNSIGNED)
12345678912345
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678912345x'
SELECT CAST('123456789123456x' AS UNSIGNED);
CAST('123456789123456x' AS UNSIGNED)
123456789123456
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123456x'
SELECT CAST('1234567891234567x' AS UNSIGNED);
CAST('1234567891234567x' AS UNSIGNED)
1234567891234567
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567891234567x'
SELECT CAST('12345678912345678x' AS UNSIGNED);
CAST('12345678912345678x' AS UNSIGNED)
12345678912345678
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678912345678x'
SELECT CAST('123456789123456789x' AS UNSIGNED);
CAST('123456789123456789x' AS UNSIGNED)
123456789123456789
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123456789x'
SELECT CAST('1234567891234567891x' AS UNSIGNED);
CAST('1234567891234567891x' AS UNSIGNED)
1234567891234567891
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567891234567891x'
SELECT CAST('12345678912345678912x' AS UNSIGNED);
CAST('12345678912345678912x' AS UNSIGNED)
12345678912345678912
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678912345678912x'
SELECT CAST('123456789123456789123x' AS UNSIGNED);
CAST('123456789123456789123x' AS UNSIGNED)
18446744073709551615
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123456789123x'
#
# Testing cs->cset->strntoull10rnd
#
CREATE TABLE t1 (a int);
INSERT INTO t1 VALUES ('-1234.1e2');
INSERT INTO t1 VALUES ('-1234.1e2xxxx');
Warnings:
Warning 1265 Data truncated for column 'a' at row 1
INSERT INTO t1 VALUES ('-1234.1e2 ');
INSERT INTO t1 VALUES ('123');
INSERT INTO t1 VALUES ('-124');
INSERT INTO t1 VALUES ('+125');
INSERT INTO t1 VALUES (' \t \t 123');
INSERT INTO t1 VALUES (' \t \t -124');
INSERT INTO t1 VALUES (' \t \t +125');
INSERT INTO t1 VALUES (' \t \t 000123');
INSERT INTO t1 VALUES (' \t \t -000124');
INSERT INTO t1 VALUES (' \t \t +000125');
SELECT * FROM t1;
a
-123410
-123410
-123410
123
-124
125
123
-124
125
123
-124
125
DROP TABLE t1;
CREATE TABLE t1 (a BIGINT UNSIGNED);
INSERT INTO t1 VALUES ('255'), ('65535'),('16777215'),('4294967295'),
('1099511627775'),('281474976710655'),('72057594037927935'),
('1844674407370955161'),('18446744073709551614'), ('18446744073709551615');
SELECT * FROM t1;
a
255
65535
16777215
4294967295
1099511627775
281474976710655
72057594037927935
1844674407370955161
18446744073709551614
18446744073709551615
DROP TABLE t1;
#
# Testing cs->cset->scan
#
CREATE TABLE t1 (a int);
INSERT INTO t1 VALUES ('1 ');
INSERT INTO t1 VALUES ('1 x');
Warnings:
Warning 1265 Data truncated for column 'a' at row 1
SELECT * FROM t1;
a
1
1
DROP TABLE t1;
#
# Testing auto-conversion to TEXT
#
CREATE TABLE t1 (a VARCHAR(17000) CHARACTER SET utf16le);
Warnings:
Note 1246 Converting column 'a' from VARCHAR to TEXT
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` mediumtext CHARACTER SET utf16le
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1;
#
# Testing that maximim possible key length is 1000 bytes
#
CREATE TABLE t1 (a VARCHAR(250) CHARACTER SET utf16le PRIMARY KEY);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(250) CHARACTER SET utf16le NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1;
CREATE TABLE t1 (a VARCHAR(334) CHARACTER SET utf16le PRIMARY KEY);
ERROR 42000: Specified key was too long; max key length is 1000 bytes
#
# Conversion to utf8
#
CREATE TABLE t1 (a CHAR(1) CHARACTER SET utf16le);
INSERT INTO t1 VALUES (0x00D800DC),(0x00D8FFDC),(0x7FDB00DC),(0x7FDBFFDC);
INSERT INTO t1 VALUES (0xC000), (0xFF00),(0x00E0), (0xFFFF);
SELECT HEX(a), HEX(@a:=CONVERT(a USING utf8mb4)), HEX(CONVERT(@a USING utf16le)) FROM t1;
HEX(a) HEX(@a:=CONVERT(a USING utf8mb4)) HEX(CONVERT(@a USING utf16le))
00D800DC F0908080 00D800DC
00D8FFDC F09083BF 00D8FFDC
7FDB00DC F3AFB080 7FDB00DC
7FDBFFDC F3AFB3BF 7FDBFFDC
C000 C380 C000
FF00 C3BF FF00
00E0 EE8080 00E0
FFFF EFBFBF FFFF
DROP TABLE t1;
#
# Test basic regex functionality
#
SET NAMES utf8, collation_connection=utf16le_general_ci;
drop table if exists t1;
create table t1 as
select repeat(' ', 64) as s1, repeat(' ',64) as s2
union
select null, null;
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`s1` varchar(64) CHARACTER SET utf16le DEFAULT NULL,
`s2` varchar(64) CHARACTER SET utf16le DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
delete from t1;
insert into t1 values('aaa','aaa');
insert into t1 values('aaa|qqq','qqq');
insert into t1 values('gheis','^[^a-dXYZ]+$');
insert into t1 values('aab','^aa?b');
insert into t1 values('Baaan','^Ba*n');
insert into t1 values('aaa','qqq|aaa');
insert into t1 values('qqq','qqq|aaa');
insert into t1 values('bbb','qqq|aaa');
insert into t1 values('bbb','qqq');
insert into t1 values('aaa','aba');
insert into t1 values(null,'abc');
insert into t1 values('def',null);
insert into t1 values(null,null);
insert into t1 values('ghi','ghi[');
select HIGH_PRIORITY s1 regexp s2 from t1;
s1 regexp s2
0
0
0
1
1
1
1
1
1
1
NULL
NULL
NULL
NULL
drop table t1;
#
# Test how CHARACTER SET works with date/time
#
CREATE TABLE t1 AS SELECT repeat('a',20) AS s1 LIMIT 0;
SET timestamp=1216359724;
INSERT INTO t1 VALUES (current_date);
INSERT INTO t1 VALUES (current_time);
INSERT INTO t1 VALUES (current_timestamp);
SELECT s1, hex(s1) FROM t1;
s1 hex(s1)
2008-07-18 32003000300038002D00300037002D0031003800
08:42:04 300038003A00340032003A0030003400
2008-07-18 08:42:04 32003000300038002D00300037002D00310038002000300038003A00340032003A0030003400
DROP TABLE t1;
SET timestamp=0;
#
# Bug#33073 Character sets: ordering fails with utf32
#
SET NAMES utf8, collation_connection=utf16le_general_ci;
CREATE TABLE t1 AS SELECT REPEAT('a',2) as s1 LIMIT 0;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`s1` varchar(2) CHARACTER SET utf16le NOT NULL DEFAULT ''
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE');
SELECT * FROM t1 ORDER BY s1;
s1
ab
ab
AE
AE
SET max_sort_length=4;
SELECT * FROM t1 ORDER BY s1;
s1
ab
ab
AE
AE
DROP TABLE t1;
SET max_sort_length=DEFAULT;
#
# Bug#52520 Difference in tinytext utf column metadata
#
CREATE TABLE t1 (
s1 TINYTEXT CHARACTER SET utf16le,
s2 TEXT CHARACTER SET utf16le,
s3 MEDIUMTEXT CHARACTER SET utf16le,
s4 LONGTEXT CHARACTER SET utf16le
);
SET NAMES utf8, @@character_set_results=NULL;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 255 0 Y 16 0 56
def test t1 t1 s2 s2 252 65535 0 Y 16 0 56
def test t1 t1 s3 s3 252 16777215 0 Y 16 0 56
def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 56
def HEX(s1) 253 6120 0 Y 0 0 33
s1 s2 s3 s4 HEX(s1)
SET NAMES latin1;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 127 0 Y 16 0 8
def test t1 t1 s2 s2 252 32767 0 Y 16 0 8
def test t1 t1 s3 s3 252 8388607 0 Y 16 0 8
def test t1 t1 s4 s4 252 2147483647 0 Y 16 0 8
def HEX(s1) 253 2040 0 Y 0 0 8
s1 s2 s3 s4 HEX(s1)
SET NAMES utf8;
SELECT *, HEX(s1) FROM t1;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 s1 s1 252 381 0 Y 16 0 33
def test t1 t1 s2 s2 252 98301 0 Y 16 0 33
def test t1 t1 s3 s3 252 25165821 0 Y 16 0 33
def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 33
def HEX(s1) 253 6120 0 Y 0 0 33
s1 s2 s3 s4 HEX(s1)
CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1;
SHOW CREATE TABLE t2;
Table Create Table
t2 CREATE TABLE `t2` (
`CONCAT(s1)` varchar(255) CHARACTER SET utf16le DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1, t2;
#
# Problem found by Roy during review
# MY_CS_BINSORT was not set for utf16le_bin,
# so filesort did not work well
#
SET NAMES utf8, @@collation_connection=utf16le_bin;
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) as c LIMIT 0;
ALTER TABLE t1 ADD PRIMARY KEY(c);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c` varchar(10) CHARACTER SET utf16le COLLATE utf16le_bin NOT NULL DEFAULT '',
PRIMARY KEY (`c`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES ('abc'),('zyx'),('acb');
SELECT UPPER(c) FROM t1 ORDER BY 1 DESC;
UPPER(c)
ZYX
ACB
ABC
DROP TABLE t1;
#
# End of 5.6 tests
#
......@@ -919,6 +919,182 @@ Warning 1292 Truncated incorrect DOUBLE value: '1.2xxx'
select left('aaa','1');
left('aaa','1')
a
#
# Testing cs->cset->strtoll10()
#
SELECT @@character_set_connection;
@@character_set_connection
utf32
SELECT CAST('1' AS UNSIGNED);
CAST('1' AS UNSIGNED)
1
SELECT CAST('12' AS UNSIGNED);
CAST('12' AS UNSIGNED)
12
SELECT CAST('123' AS UNSIGNED);
CAST('123' AS UNSIGNED)
123
SELECT CAST('1234' AS UNSIGNED);
CAST('1234' AS UNSIGNED)
1234
SELECT CAST('12345' AS UNSIGNED);
CAST('12345' AS UNSIGNED)
12345
SELECT CAST('123456' AS UNSIGNED);
CAST('123456' AS UNSIGNED)
123456
SELECT CAST('1234567' AS UNSIGNED);
CAST('1234567' AS UNSIGNED)
1234567
SELECT CAST('12345678' AS UNSIGNED);
CAST('12345678' AS UNSIGNED)
12345678
SELECT CAST('123456789' AS UNSIGNED);
CAST('123456789' AS UNSIGNED)
123456789
SELECT CAST('1234567891' AS UNSIGNED);
CAST('1234567891' AS UNSIGNED)
1234567891
SELECT CAST('12345678912' AS UNSIGNED);
CAST('12345678912' AS UNSIGNED)
12345678912
SELECT CAST('123456789123' AS UNSIGNED);
CAST('123456789123' AS UNSIGNED)
123456789123
SELECT CAST('1234567891234' AS UNSIGNED);
CAST('1234567891234' AS UNSIGNED)
1234567891234
SELECT CAST('12345678912345' AS UNSIGNED);
CAST('12345678912345' AS UNSIGNED)
12345678912345
SELECT CAST('123456789123456' AS UNSIGNED);
CAST('123456789123456' AS UNSIGNED)
123456789123456
SELECT CAST('1234567891234567' AS UNSIGNED);
CAST('1234567891234567' AS UNSIGNED)
1234567891234567
SELECT CAST('12345678912345678' AS UNSIGNED);
CAST('12345678912345678' AS UNSIGNED)
12345678912345678
SELECT CAST('123456789123456789' AS UNSIGNED);
CAST('123456789123456789' AS UNSIGNED)
123456789123456789
SELECT CAST('1234567891234567891' AS UNSIGNED);
CAST('1234567891234567891' AS UNSIGNED)
1234567891234567891
SELECT CAST('12345678912345678912' AS UNSIGNED);
CAST('12345678912345678912' AS UNSIGNED)
12345678912345678912
SELECT CAST('123456789123456789123' AS UNSIGNED);
CAST('123456789123456789123' AS UNSIGNED)
18446744073709551615
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123456789123'
SELECT CAST('1x' AS UNSIGNED);
CAST('1x' AS UNSIGNED)
1
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1x'
SELECT CAST('12x' AS UNSIGNED);
CAST('12x' AS UNSIGNED)
12
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12x'
SELECT CAST('123x' AS UNSIGNED);
CAST('123x' AS UNSIGNED)
123
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123x'
SELECT CAST('1234x' AS UNSIGNED);
CAST('1234x' AS UNSIGNED)
1234
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234x'
SELECT CAST('12345x' AS UNSIGNED);
CAST('12345x' AS UNSIGNED)
12345
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345x'
SELECT CAST('123456x' AS UNSIGNED);
CAST('123456x' AS UNSIGNED)
123456
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456x'
SELECT CAST('1234567x' AS UNSIGNED);
CAST('1234567x' AS UNSIGNED)
1234567
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567x'
SELECT CAST('12345678x' AS UNSIGNED);
CAST('12345678x' AS UNSIGNED)
12345678
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678x'
SELECT CAST('123456789x' AS UNSIGNED);
CAST('123456789x' AS UNSIGNED)
123456789
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789x'
SELECT CAST('1234567891x' AS UNSIGNED);
CAST('1234567891x' AS UNSIGNED)
1234567891
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567891x'
SELECT CAST('12345678912x' AS UNSIGNED);
CAST('12345678912x' AS UNSIGNED)
12345678912
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678912x'
SELECT CAST('123456789123x' AS UNSIGNED);
CAST('123456789123x' AS UNSIGNED)
123456789123
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123x'
SELECT CAST('1234567891234x' AS UNSIGNED);
CAST('1234567891234x' AS UNSIGNED)
1234567891234
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567891234x'
SELECT CAST('12345678912345x' AS UNSIGNED);
CAST('12345678912345x' AS UNSIGNED)
12345678912345
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678912345x'
SELECT CAST('123456789123456x' AS UNSIGNED);
CAST('123456789123456x' AS UNSIGNED)
123456789123456
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123456x'
SELECT CAST('1234567891234567x' AS UNSIGNED);
CAST('1234567891234567x' AS UNSIGNED)
1234567891234567
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567891234567x'
SELECT CAST('12345678912345678x' AS UNSIGNED);
CAST('12345678912345678x' AS UNSIGNED)
12345678912345678
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678912345678x'
SELECT CAST('123456789123456789x' AS UNSIGNED);
CAST('123456789123456789x' AS UNSIGNED)
123456789123456789
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123456789x'
SELECT CAST('1234567891234567891x' AS UNSIGNED);
CAST('1234567891234567891x' AS UNSIGNED)
1234567891234567891
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '1234567891234567891x'
SELECT CAST('12345678912345678912x' AS UNSIGNED);
CAST('12345678912345678912x' AS UNSIGNED)
12345678912345678912
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '12345678912345678912x'
SELECT CAST('123456789123456789123x' AS UNSIGNED);
CAST('123456789123456789123x' AS UNSIGNED)
18446744073709551615
Warnings:
Warning 1292 Truncated incorrect INTEGER value: '123456789123456789123x'
create table t1 (a int);
insert into t1 values ('-1234.1e2');
insert into t1 values ('-1234.1e2xxxx');
......@@ -1126,6 +1302,20 @@ my_col
00
DROP TABLE t1;
#
# Bug#32859 Character sets: no warning with non-fitting chariot wheel
#
CREATE TABLE t1 (utf32 CHAR(5) CHARACTER SET utf32, latin1 CHAR(5) CHARACTER SET latin1);
INSERT INTO t1 (utf32) VALUES (0xc581);
UPDATE t1 SET latin1 = utf32;
Warnings:
Warning 1366 Incorrect string value: '\x00\x00\xC5\x81' for column 'latin1' at row 1
DELETE FROM t1;
INSERT INTO t1 (utf32) VALUES (0x100cc);
UPDATE t1 SET latin1 = utf32;
Warnings:
Warning 1366 Incorrect string value: '\x00\x01\x00\xCC' for column 'latin1' at row 1
DROP TABLE t1;
#
# Bug#55912 FORMAT with locale set fails for numbers < 1000
#
SET collation_connection=utf32_general_ci;
......
......@@ -430,7 +430,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
39
40
'#--------------------FN_DYNVARS_010_10-------------------------#'
SET @@character_set_client = abc;
ERROR 42000: Unknown character set: 'abc'
......
......@@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
39
40
'#--------------------FN_DYNVARS_011_10-------------------------#'
SET @@character_set_connection = abc;
ERROR 42000: Unknown character set: 'abc'
......
......@@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
39
40
'#--------------------FN_DYNVARS_012_10-------------------------#'
SET @@character_set_database = "grek";
ERROR 42000: Unknown character set: 'grek'
......
......@@ -402,7 +402,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
39
40
'#--------------------FN_DYNVARS_008_10-------------------------#'
SET @@character_set_filesystem = abc;
ERROR 42000: Unknown character set: 'abc'
......
......@@ -839,3 +839,20 @@ DROP TABLE t1;
--echo #
--echo # End of 5.5 tests
--echo #
--echo #
--echo # Start of 5.6 tests
--echo #
--echo #
--echo # Bug#59145 valgrind warnings for uninitialized values in my_strtoll10_mb2
--echo #
SET NAMES latin1;
SELECT CONVERT(CHAR(NULL USING ucs2), UNSIGNED);
DO IFNULL(CHAR(NULL USING ucs2), '');
DO CAST(CONVERT('' USING ucs2) AS UNSIGNED);
--echo #
--echo # End of 5.6 tests
--echo #
......@@ -653,6 +653,7 @@ select 1.1 + '1.2xxx';
# Testing strntoll10_utf16
# Testing cs->cset->strtoll10
select left('aaa','1');
--source include/ctype_strtoll10.inc
#
# Testing cs->cset->strntoull10rnd
......
-- source include/have_ucs2.inc
-- source include/have_utf16.inc
-- source include/have_utf32.inc
-- source include/have_utf8mb4.inc
SET TIME_ZONE='+03:00';
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
--echo #
--echo # Start of 5.6 tests
--echo #
SET NAMES utf8mb4, collation_connection=utf16le_general_ci;
SELECT HEX('a'), HEX('a ');
-- source include/endspace.inc
--echo #
--echo # Check that incomplete utf16le characters in HEX notation
--echo # are left-padded with zeros
--echo #
SELECT HEX(_utf16le 0x44);
SELECT HEX(_utf16le 0x3344);
SELECT HEX(_utf16le 0x113344);
--echo #
--echo # Check that 0x20 is only trimmed when it is
--echo # a part of real SPACE character, not just a part
--echo # of a multibyte sequence.
--echo # Note, CYRILLIC LETTER ER is used as an example, which
--echo # is stored as 0x0420 in utf16le, thus contains 0x20 in the
--echo # low byte. The second character is THREE-PER-M, U+2004,
--echo # which contains 0x20 in the high byte.
--echo #
CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf16le;
INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (_ucs2 X'2004',_ucs2 X'2004');
SELECT HEX(word) FROM t1 ORDER BY word;
SELECT HEX(word2) FROM t1 ORDER BY word2;
DELETE FROM t1;
--echo #
--echo # Check that real spaces are correctly trimmed.
--echo #
INSERT INTO t1 VALUES (_ucs2 X'042000200020', _ucs2 X'042000200020');
INSERT INTO t1 VALUES (_ucs2 X'200400200020', _ucs2 X'200400200020');
SELECT HEX(word) FROM t1 ORDER BY word;
SELECT HEX(word2) FROM t1 ORDER BY word2;
DROP TABLE t1;
--echo #
--echo # Check LPAD/RPAD
--echo #
CREATE TABLE t1 (a VARCHAR(10), pad INT, b VARCHAR(10)) CHARACTER SET utf16le;
INSERT INTO t1 VALUES (_ucs2 X'0420', 10, _ucs2 X'0421');
INSERT INTO t1 VALUES (_ucs2 X'0420', 10, _ucs2 X'04210422');
INSERT INTO t1 VALUES (_ucs2 X'0420', 10, _ucs2 X'042104220423');
INSERT INTO t1 VALUES (_ucs2 X'0420042104220423042404250426042704280429042A042B',10,_ucs2 X'042104220423');
INSERT INTO t1 VALUES (_utf32 X'010000', 10, _ucs2 X'0421');
INSERT INTO t1 VALUES (_ucs2 X'0421', 10, _utf32 X'010000');
SELECT a, pad, b, LPAD(a, pad, b), HEX(LPAD(a, pad, b)) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 SELECT
LPAD(_utf16le X'2004',10,_utf16le X'2104') l,
RPAD(_utf16le X'2004',10,_utf16le X'2104') r;
SHOW CREATE TABLE t1;
SELECT HEX(l), HEX(r) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (f1 CHAR(30));
INSERT INTO t1 VALUES ("103000"), ("22720000"), ("3401200"), ("78000");
SELECT LPAD(f1, 12, "-o-/") FROM t1;
DROP TABLE t1;
--echo #
--echo # Testing LIKE
--echo #
SET NAMES utf8, collation_connection=utf16le_general_ci;
--source include/ctype_like.inc
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16le);
INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывА'),('ФЫВА');
INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж');
INSERT INTO t1 VALUES ('фывАпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж');
INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВАПРОЛДЖ');
SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf16le_bin ORDER BY BINARY a;
DROP TABLE t1;
CREATE TABLE t1 (word VARCHAR(64) NOT NULL, PRIMARY KEY (word))
ENGINE=MyISAM CHARACTER SET utf16le;
INSERT INTO t1 (word) VALUES ("cat");
SELECT * FROM t1 WHERE word LIKE "c%";
SELECT * FROM t1 WHERE word LIKE "ca_";
SELECT * FROM t1 WHERE word LIKE "cat";
SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630025'; # "c%"
SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630061005F'; # "ca_"
DROP TABLE t1;
--echo #
--echo # Check that INSERT() works fine.
--echo # This invokes charpos() function.
--echo #
CREATE TABLE t1 (
a VARCHAR(10) CHARACTER SET utf16le,
b VARCHAR(10) CHARACTER SET utf16le);
INSERT INTO t1 VALUES ('abc', 'def');
SELECT INSERT(a, 10, 2, b) FROM t1;
SELECT INSERT(a, 1, 2, b) FROM t1;
DROP TABLE t1;
--echo #
--echo # Bug#1264
--echo #
# Description:
#
# When USING a ucs2 TABLE in MySQL,
# either with ucs2_general_ci or ucs2_bin collation,
# words are returned in an incorrect order when USING ORDER BY
# on an _indexed_ CHAR or VARCHAR column. They are sorted with
# the longest word *first* instead of last. I.E. The word "aardvark"
# is in the results before the word "a".
#
# If there is no index for the column, the problem does not occur.
#
# Interestingly, if there is no second column, the words are returned
# in the correct order.
#
# According to EXPLAIN, it looks like when the output includes columns that
# are not part of the index sorted on, it does a filesort, which fails.
# Using a straight index yields correct results.
SET NAMES utf8, collation_connection=utf16le_general_ci;
--echo #
--echo # Two fields, index
--echo #
CREATE TABLE t1 (
word VARCHAR(64),
bar INT(11) DEFAULT 0,
PRIMARY KEY (word))
ENGINE=MyISAM
CHARSET utf16le
COLLATE utf16le_general_ci ;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a");
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY word;
SELECT * FROM t1 ORDER BY word;
EXPLAIN SELECT word FROM t1 ORDER BY word;
SELECT word FROM t1 ORDER by word;
DROP TABLE t1;
--echo #
--echo # One field, index
--echo #
CREATE TABLE t1 (
word VARCHAR(64) ,
PRIMARY KEY (word))
ENGINE=MyISAM
CHARSET utf16le
COLLATE utf16le_general_ci;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a");
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY WORD;
SELECT * FROM t1 ORDER BY word;
DROP TABLE t1;
--echo #
--echo # Two fields, no index
--echo #
CREATE TABLE t1 (
word TEXT,
bar INT(11) AUTO_INCREMENT,
PRIMARY KEY (bar))
ENGINE=MyISAM
CHARSET utf16le
COLLATE utf16le_general_ci ;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a" );
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY word;
SELECT * FROM t1 ORDER BY word;
EXPLAIN SELECT word FROM t1 ORDER BY word;
SELECT word FROM t1 ORDER BY word;
DROP TABLE t1;
--echo #
--echo # END OF Bug 1264 test
--echo #
--echo #
--echo # Check alignment for from-binary-conversion with CAST and CONVERT
--echo #
SELECT HEX(CAST(0xAA as char CHARACTER SET utf16le));
SELECT HEX(CONVERT(0xAA USING utf16le));
--echo #
--echo # Check alignment for string types
--echo #
CREATE TABLE t1 (a CHAR(10) CHARACTER SET utf16le);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16le);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a TEXT CHARACTER SET utf16le);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a MEDIUMTEXT CHARACTER SET utf16le);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a LONGTEXT CHARACTER SET utf16le);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
--echo #
--echo # Bug#5081 : UCS2 fields are filled with '0x2020'
--echo # after extending field length
--echo #
CREATE TABLE t1(a CHAR(1)) DEFAULT CHARSET utf16le;
INSERT INTO t1 VALUES ('a'),('b'),('c');
ALTER TABLE t1 MODIFY a CHAR(5);
SELECT a, HEX(a) FROM t1;
DROP TABLE t1;
--echo #
--echo # Check prepare statement from an UTF16 string
--echo #
SET NAMES latin1;
SET @ivar= 1234;
SET @str1 = 'SELECT ?';
SET @str2 = CONVERT(@str1 USING utf16le);
PREPARE stmt1 FROM @str2;
EXECUTE stmt1 USING @ivar;
--echo #
--echo # Check that utf16le works with ENUM and SET type
--echo #
SET NAMES utf8, collation_connection=utf16le_general_ci;
CREATE TABLE t1 (a ENUM('x','y','z') CHARACTER SET utf16le);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('x');
INSERT INTO t1 VALUES ('y');
INSERT INTO t1 VALUES ('z');
SELECT a, HEX(a) FROM t1 ORDER BY a;
ALTER TABLE t1 CHANGE a a ENUM('x','y','z','d','e','ä','ö','ü') CHARACTER SET utf16le;
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('D');
INSERT INTO t1 VALUES ('E ');
INSERT INTO t1 VALUES ('ä');
INSERT INTO t1 VALUES ('ö');
INSERT INTO t1 VALUES ('ü');
SELECT a, HEX(a) FROM t1 ORDER BY a;
DROP TABLE t1;
CREATE TABLE t1 (a set ('x','y','z','ä','ö','ü') CHARACTER SET utf16le);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('x');
INSERT INTO t1 VALUES ('y');
INSERT INTO t1 VALUES ('z');
INSERT INTO t1 VALUES ('x,y');
INSERT INTO t1 VALUES ('x,y,z,ä,ö,ü');
SELECT a, HEX(a) FROM t1 ORDER BY a;
DROP TABLE t1;
--echo #
--echo # Bug#7302 UCS2 data in ENUM fields get truncated when new column is added
--echo #
CREATE TABLE t1(a ENUM('a','b','c')) DEFAULT CHARACTER SET utf16le;
INSERT INTO t1 VALUES('a'),('b'),('c');
ALTER TABLE t1 ADD b CHAR(1);
SHOW WARNINGS;
SELECT * FROM t1 ORDER BY a;
DROP TABLE t1;
SET NAMES utf8, collation_connection='utf16le_general_ci';
-- source include/ctype_filesort.inc
-- source include/ctype_like_escape.inc
SET NAMES utf8, collation_connection='utf16le_bin';
-- source include/ctype_filesort.inc
-- source include/ctype_filesort2.inc
-- source include/ctype_like_escape.inc
--echo #
--echo # Bug#10344 Some string functions fail for UCS2
--echo #
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16le, pos INT);
INSERT INTO t1 VALUES (_ucs2 0x00e400e50068,1);
INSERT INTO t1 VALUES (_ucs2 0x00e400e50068,2);
INSERT INTO t1 VALUES (_ucs2 0x00e400e50068,3);
INSERT INTO t1 VALUES (_ucs2 0x00e400e50068,-1);
INSERT INTO t1 VALUES (_ucs2 0x00e400e50068,-2);
INSERT INTO t1 VALUES (_ucs2 0x00e400e50068,-3);
INSERT INTO t1 VALUES (_utf32 0x000000e4000000e500010000, 1);
INSERT INTO t1 VALUES (_utf32 0x000000e4000000e500010000, 2);
INSERT INTO t1 VALUES (_utf32 0x000000e4000000e500010000, 3);
INSERT INTO t1 VALUES (_utf32 0x000000e4000000e500010000, -1);
INSERT INTO t1 VALUES (_utf32 0x000000e4000000e500010000, -2);
INSERT INTO t1 VALUES (_utf32 0x000000e4000000e500010000, -3);
SELECT HEX(SUBSTR(a, pos)), SUBSTR(a, pos) FROM t1;
DROP TABLE t1;
SET NAMES utf8, collation_connection=utf16le_general_ci;
--echo #
--echo # Bug#9442 Set parameter make query fail if column CHARACTER SET is UCS2
--echo #
CREATE TABLE t1 (utext VARCHAR(20) CHARACTER SET utf16le);
INSERT INTO t1 VALUES ("lily");
INSERT INTO t1 VALUES ("river");
PREPARE stmt FROM 'SELECT utext FROM t1 where utext like ?';
SET @param1='%%';
EXECUTE stmt USING @param1;
EXECUTE stmt USING @param1;
SELECT utext FROM t1 where utext like '%%';
DROP TABLE t1;
DEALLOCATE PREPARE stmt;
--echo #
--echo # Bug#22052 Trailing spaces are not removed FROM UNICODE fields in an index
--echo #
CREATE TABLE t1 (
a CHAR(10) CHARACTER SET utf16le NOT NULL,
INDEX a (a)
) engine=myisam;
INSERT INTO t1 VALUES (REPEAT(_ucs2 0x201f, 10));
INSERT INTO t1 VALUES (REPEAT(_ucs2 0x2020, 10));
INSERT INTO t1 VALUES (REPEAT(_ucs2 0x2021, 10));
--echo # make sure "index read" is used
explain SELECT HEX(a) FROM t1 ORDER BY a;
SELECT HEX(a) FROM t1 ORDER BY a;
ALTER TABLE t1 DROP INDEX a;
SELECT HEX(a) FROM t1 ORDER BY a;
DROP TABLE t1;
--echo #
--echo # Bug #20108: corrupted default enum value for a ucs2 field
--echo #
CREATE TABLE t1 (
status ENUM('active','passive') CHARACTER SET utf16le COLLATE utf16le_general_ci
NOT NULL DEFAULT 'passive'
);
SHOW CREATE TABLE t1;
ALTER TABLE t1 ADD a int NOT NULL AFTER status;
SHOW CREATE TABLE t1;
DROP TABLE t1;
--echo #
--echo # Conversion FROM an UTF16LE string to a decimal column
--echo #
CREATE TABLE t1 (a VARCHAR(64) CHARACTER SET utf16le, b DECIMAL(10,3));
INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0);
UPDATE t1 set b=a;
SELECT *, HEX(a) FROM t1;
DROP TABLE t1;
--echo #
--echo # Bug#9442 Set parameter make query fail if column CHARACTER SET is UCS2
--echo #
CREATE TABLE t1 (utext VARCHAR(20) CHARACTER SET utf16le);
INSERT INTO t1 VALUES ("lily");
INSERT INTO t1 VALUES ("river");
PREPARE stmt FROM 'SELECT utext FROM t1 where utext like ?';
SET @param1='%%';
EXECUTE stmt USING @param1;
EXECUTE stmt USING @param1;
SELECT utext FROM t1 where utext like '%%';
DROP TABLE t1;
DEALLOCATE PREPARE stmt;
--echo #
--echo # Bug#22638 SOUNDEX broken for international characters
--echo #
SET NAMES utf8, collation_connection=utf16le_general_ci;
SELECT SOUNDEX(''),SOUNDEX('he'),SOUNDEX('hello all folks'),SOUNDEX('#3556 in bugdb');
SELECT HEX(SOUNDEX('')),HEX(SOUNDEX('he')),HEX(SOUNDEX('hello all folks')),HEX(SOUNDEX('#3556 in bugdb'));
SELECT 'mood' sounds like 'mud';
--echo # Cyrillic A, BE, VE
SELECT HEX(SOUNDEX(_utf16le 0x041004110412));
--echo # Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter
SELECT HEX(SOUNDEX(_utf16le 0x00BF00C0));
--echo #
--echo # Bug#14290: character_maximum_length for text fields
--echo #
CREATE TABLE t1(a BLOB, b TEXT CHARSET utf16le);
SELECT data_type, character_octet_length, character_maximum_length
FROM information_schema.columns where table_name='t1';
DROP TABLE t1;
SET NAMES utf8, collation_connection=utf16le_general_ci;
--echo #
--echo # Testing cs->coll->instr()
--echo #
SELECT POSITION('bb' IN 'abba');
--echo #
--echo # Testing cs->coll->hash_sort()
--echo #
SET NAMES utf8, collation_connection=utf16le_bin;
--source include/ctype_heap.inc
SET NAMES utf8, collation_connection=utf16le_general_ci;
--source include/ctype_heap.inc
--echo #
--echo # Testing cs->cset->numchars()
--echo #
SELECT CHAR_LENGTH('abcd'), OCTET_LENGTH('abcd');
SELECT CHAR_LENGTH(_utf16le 0x00D800DC), OCTET_LENGTH(_utf16le 0x00D800DC);
SELECT CHAR_LENGTH(_utf16le 0x7DD8FFDF), OCTET_LENGTH(_utf16le 0x7FD8DDDF);
--echo #
--echo # Testing cs->cset->charpos()
--echo #
SELECT LEFT('abcd',2);
SELECT HEX(LEFT(_utf16le 0x00D800DC7FD8FFDF, 1));
SELECT HEX(RIGHT(_utf16le 0x00D800DC7FD8FFDF, 1));
--echo #
--echo # Testing cs->cset->well_formed_length()
--echo #
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16le);
--echo # Bad sequences
--error ER_INVALID_CHARACTER_STRING
INSERT INTO t1 VALUES (_utf16le 0x00D8);
--error ER_INVALID_CHARACTER_STRING
INSERT INTO t1 VALUES (_utf16le 0x00DC);
--error ER_INVALID_CHARACTER_STRING
INSERT INTO t1 VALUES (_utf16le 0x00D800D8);
--error ER_INVALID_CHARACTER_STRING
INSERT INTO t1 VALUES (_utf16le 0x00D800E8);
--error ER_INVALID_CHARACTER_STRING
INSERT INTO t1 VALUES (_utf16le 0x00D80008);
--echo # Good sequences
INSERT INTO t1 VALUES (_utf16le 0x00D800DC);
INSERT INTO t1 VALUES (_utf16le 0x00D8FFDC);
INSERT INTO t1 VALUES (_utf16le 0xFFDB00DC);
INSERT INTO t1 VALUES (_utf16le 0xFFDBFFDC);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
--echo #
--echo # Bug#32393 Character sets: illegal characters in utf16le columns
--echo #
--echo # Tests that cs->cset->wc_mb() doesn't accept surrogate parts
--echo #
--echo # via ALTER
--echo #
CREATE TABLE t1 (s1 VARCHAR(50) CHARACTER SET ucs2);
INSERT INTO t1 VALUES (0xDF84);
ALTER TABLE t1 MODIFY column s1 VARCHAR(50) CHARACTER SET utf16le;
SELECT HEX(s1) FROM t1;
DROP TABLE t1;
--echo #
--echo # via UPDATE
--echo #
CREATE TABLE t1 (s1 VARCHAR(5) CHARACTER SET ucs2, s2 VARCHAR(5) CHARACTER SET utf16le);
INSERT INTO t1 (s1) VALUES (0xdf84);
UPDATE t1 set s2 = s1;
SELECT HEX(s2) FROM t1;
DROP TABLE t1;
--echo #
--echo # Testing cs->cset->lengthsp()
--echo #
CREATE TABLE t1 (a CHAR(10)) CHARACTER SET utf16le;
INSERT INTO t1 VALUES ('a ');
SELECT HEX(a) FROM t1;
DROP TABLE t1;
--echo #
--echo # Testing cs->cset->caseup() and cs->cset->casedn()
--echo #
SELECT UPPER('abcd'), LOWER('ABCD');
--echo #
--echo # Checking str_to_datetime()
--echo #
select @@collation_connection;
CREATE TABLE t1 (a date);
INSERT INTO t1 VALUES ('2007-09-16');
SELECT * FROM t1;
DROP TABLE t1;
--echo #
--echo # Testing cs->cset->ll10tostr
--echo #
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16le);
INSERT INTO t1 VALUES (123456);
SELECT a, HEX(a) FROM t1;
DROP TABLE t1;
--echo #
--echo # Testing cs->cset->fill
--echo # SOUNDEX fills strings with DIGIT ZERO up to four characters
--echo #
SELECT SOUNDEX('a'), HEX(SOUNDEX('a'));
--echo #
--echo # Testing cs->cset->strntoul
--echo #
CREATE TABLE t1 (a enum ('a','b','c')) CHARACTER SET utf16le;
INSERT INTO t1 VALUES ('1');
SELECT * FROM t1;
DROP TABLE t1;
--echo #
--echo # Testing cs->cset->strntoll and cs->cset->strntoull
--echo #
SET NAMES latin1;
SELECT HEX(CONV(CONVERT('123' USING utf16le), -10, 16));
SELECT HEX(CONV(CONVERT('123' USING utf16le), 10, 16));
SET NAMES utf8, collation_connection=utf16le_general_ci;
--echo #
--echo # Testing cs->cset->strntod
--echo #
SET NAMES utf8, collation_connection=utf16le_general_ci;
SELECT 1.1 + '1.2';
SELECT 1.1 + '1.2xxx';
--echo #
--echo # Testing cs->cset->strtoll10
--echo #
SELECT LEFT('aaa','1');
CREATE TABLE t1 AS SELECT REPEAT('abcd', 128) AS a;
SELECT LEFT(a, '2') FROM t1;
SELECT LEFT(a, ' \t \t 2') FROM t1;
SELECT LEFT(a, ' \t \t +2') FROM t1;
SELECT SUBSTR(a, '-2') FROM t1;
SELECT SUBSTR(a, ' \t \t -2') FROM t1;
SELECT LEFT(a, '00002') FROM t1;
SELECT LEFT(a, ' \t \t 00002') FROM t1;
SELECT LEFT(a, ' \t \t +00002') FROM t1;
SELECT SUBSTR(a, '-00002') FROM t1;
SELECT SUBSTR(a, ' \t \t -00002') FROM t1;
DROP TABLE t1;
CREATE TABLE t1 AS SELECT REPEAT('abcd', 128) AS a LIMIT 0;
INSERT INTO t1 VALUES ('255'), ('65535'),('16777215'),('4294967295'),
('1099511627775'),('281474976710655'),('72057594037927935'),
('1844674407370955161'),('18446744073709551614'), ('18446744073709551615');
SELECT a, CAST(a AS SIGNED), CAST(a AS UNSIGNED) FROM t1;
UPDATE t1 SET a=CONCAT('-', a);
SELECT a, CAST(a AS SIGNED) FROM t1;
DROP TABLE t1;
--source include/ctype_strtoll10.inc
--echo #
--echo # Testing cs->cset->strntoull10rnd
--echo #
CREATE TABLE t1 (a int);
INSERT INTO t1 VALUES ('-1234.1e2');
INSERT INTO t1 VALUES ('-1234.1e2xxxx');
INSERT INTO t1 VALUES ('-1234.1e2 ');
INSERT INTO t1 VALUES ('123');
INSERT INTO t1 VALUES ('-124');
INSERT INTO t1 VALUES ('+125');
INSERT INTO t1 VALUES (' \t \t 123');
INSERT INTO t1 VALUES (' \t \t -124');
INSERT INTO t1 VALUES (' \t \t +125');
INSERT INTO t1 VALUES (' \t \t 000123');
INSERT INTO t1 VALUES (' \t \t -000124');
INSERT INTO t1 VALUES (' \t \t +000125');
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a BIGINT UNSIGNED);
INSERT INTO t1 VALUES ('255'), ('65535'),('16777215'),('4294967295'),
('1099511627775'),('281474976710655'),('72057594037927935'),
('1844674407370955161'),('18446744073709551614'), ('18446744073709551615');
SELECT * FROM t1;
DROP TABLE t1;
--echo #
--echo # Testing cs->cset->scan
--echo #
CREATE TABLE t1 (a int);
INSERT INTO t1 VALUES ('1 ');
INSERT INTO t1 VALUES ('1 x');
SELECT * FROM t1;
DROP TABLE t1;
--echo #
--echo # Testing auto-conversion to TEXT
--echo #
CREATE TABLE t1 (a VARCHAR(17000) CHARACTER SET utf16le);
SHOW CREATE TABLE t1;
DROP TABLE t1;
--echo #
--echo # Testing that maximim possible key length is 1000 bytes
--echo #
CREATE TABLE t1 (a VARCHAR(250) CHARACTER SET utf16le PRIMARY KEY);
SHOW CREATE TABLE t1;
DROP TABLE t1;
--error ER_TOO_LONG_KEY
CREATE TABLE t1 (a VARCHAR(334) CHARACTER SET utf16le PRIMARY KEY);
--echo #
--echo # Conversion to utf8
--echo #
CREATE TABLE t1 (a CHAR(1) CHARACTER SET utf16le);
INSERT INTO t1 VALUES (0x00D800DC),(0x00D8FFDC),(0x7FDB00DC),(0x7FDBFFDC);
INSERT INTO t1 VALUES (0xC000), (0xFF00),(0x00E0), (0xFFFF);
SELECT HEX(a), HEX(@a:=CONVERT(a USING utf8mb4)), HEX(CONVERT(@a USING utf16le)) FROM t1;
DROP TABLE t1;
--echo #
--echo # Test basic regex functionality
--echo #
SET NAMES utf8, collation_connection=utf16le_general_ci;
--source include/ctype_regex.inc
--echo #
--echo # Test how CHARACTER SET works with date/time
--echo #
--source include/ctype_datetime.inc
--echo #
--echo # Bug#33073 Character sets: ordering fails with utf32
--echo #
SET NAMES utf8, collation_connection=utf16le_general_ci;
CREATE TABLE t1 AS SELECT REPEAT('a',2) as s1 LIMIT 0;
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE');
SELECT * FROM t1 ORDER BY s1;
SET max_sort_length=4;
SELECT * FROM t1 ORDER BY s1;
DROP TABLE t1;
SET max_sort_length=DEFAULT;
--echo #
--echo # Bug#52520 Difference in tinytext utf column metadata
--echo #
CREATE TABLE t1 (
s1 TINYTEXT CHARACTER SET utf16le,
s2 TEXT CHARACTER SET utf16le,
s3 MEDIUMTEXT CHARACTER SET utf16le,
s4 LONGTEXT CHARACTER SET utf16le
);
--enable_metadata
SET NAMES utf8, @@character_set_results=NULL;
SELECT *, HEX(s1) FROM t1;
SET NAMES latin1;
SELECT *, HEX(s1) FROM t1;
SET NAMES utf8;
SELECT *, HEX(s1) FROM t1;
--disable_metadata
CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1;
SHOW CREATE TABLE t2;
DROP TABLE t1, t2;
--echo #
--echo # Problem found by Roy during review
--echo # MY_CS_BINSORT was not set for utf16le_bin,
--echo # so filesort did not work well
--echo #
SET NAMES utf8, @@collation_connection=utf16le_bin;
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) as c LIMIT 0;
# the problem in fact reproduced even without the primary key:
ALTER TABLE t1 ADD PRIMARY KEY(c);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('abc'),('zyx'),('acb');
SELECT UPPER(c) FROM t1 ORDER BY 1 DESC;
DROP TABLE t1;
--echo #
--echo # End of 5.6 tests
--echo #
......@@ -692,6 +692,7 @@ select 1.1 + '1.2xxx';
# Testing strntoll10_utf32
# Testing cs->cset->strtoll10
select left('aaa','1');
--source include/ctype_strtoll10.inc
#
# Testing cs->cset->strntoull10rnd
......@@ -812,6 +813,17 @@ CREATE TABLE t1 AS SELECT HEX(0x00) AS my_col;
SELECT * FROM t1;
DROP TABLE t1;
--echo #
--echo # Bug#32859 Character sets: no warning with non-fitting chariot wheel
--echo #
CREATE TABLE t1 (utf32 CHAR(5) CHARACTER SET utf32, latin1 CHAR(5) CHARACTER SET latin1);
INSERT INTO t1 (utf32) VALUES (0xc581);
UPDATE t1 SET latin1 = utf32;
DELETE FROM t1;
INSERT INTO t1 (utf32) VALUES (0x100cc);
UPDATE t1 SET latin1 = utf32;
DROP TABLE t1;
--echo #
--echo # Bug#55912 FORMAT with locale set fails for numbers < 1000
--echo #
......
......@@ -304,6 +304,8 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
#ifdef HAVE_CHARSET_utf16
add_compiled_collation(&my_charset_utf16_general_ci);
add_compiled_collation(&my_charset_utf16_bin);
add_compiled_collation(&my_charset_utf16le_general_ci);
add_compiled_collation(&my_charset_utf16le_bin);
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_utf16_unicode_ci);
add_compiled_collation(&my_charset_utf16_icelandic_uca_ci);
......
......@@ -1027,13 +1027,11 @@ longlong Item_func_signed::val_int_from_str(int *error)
value= cs->cset->strtoll10(cs, start, &end, error);
if (*error > 0 || end != start+ length)
{
char err_buff[128];
String err_tmp(err_buff,(uint32) sizeof(err_buff), system_charset_info);
err_tmp.copy(start, length, system_charset_info);
ErrConvString err(res);
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_TRUNCATED_WRONG_VALUE,
ER(ER_TRUNCATED_WRONG_VALUE), "INTEGER",
err_tmp.c_ptr());
err.ptr());
}
return value;
}
......
......@@ -509,16 +509,19 @@ static bool check_charset(sys_var *self, THD *thd, set_var *var)
if (var->value->result_type() == STRING_RESULT)
{
String str(buff, sizeof(buff), system_charset_info), *res;
if (!(res=var->value->val_str(&str)))
if (!(res= var->value->val_str(&str)))
var->save_result.ptr= NULL;
else if (!(var->save_result.ptr= get_charset_by_csname(res->c_ptr(),
MY_CS_PRIMARY,
MYF(0))) &&
!(var->save_result.ptr=get_old_charset_by_name(res->c_ptr())))
else
{
ErrConvString err(res);
my_error(ER_UNKNOWN_CHARACTER_SET, MYF(0), err.ptr());
return true;
ErrConvString err(res); /* Get utf8 '\0' terminated string */
if (!(var->save_result.ptr= get_charset_by_csname(err.ptr(),
MY_CS_PRIMARY,
MYF(0))) &&
!(var->save_result.ptr= get_old_charset_by_name(err.ptr())))
{
my_error(ER_UNKNOWN_CHARACTER_SET, MYF(0), err.ptr());
return true;
}
}
}
else // INT_RESULT
......@@ -629,11 +632,14 @@ static bool check_collation_not_null(sys_var *self, THD *thd, set_var *var)
String str(buff, sizeof(buff), system_charset_info), *res;
if (!(res= var->value->val_str(&str)))
var->save_result.ptr= NULL;
else if (!(var->save_result.ptr= get_charset_by_name(res->c_ptr(), MYF(0))))
else
{
ErrConvString err(res);
my_error(ER_UNKNOWN_COLLATION, MYF(0), err.ptr());
return true;
ErrConvString err(res); /* Get utf8 '\0'-terminated string */
if (!(var->save_result.ptr= get_charset_by_name(err.ptr(), MYF(0))))
{
my_error(ER_UNKNOWN_COLLATION, MYF(0), err.ptr());
return true;
}
}
}
else // INT_RESULT
......
......@@ -380,7 +380,10 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)),
if (str >= end)
return MY_CS_TOOSMALL;
pl= uni_to_cs[(wc>>8) & 0xFF];
if (wc > 0xFFFF)
return MY_CS_ILUNI;
pl= uni_to_cs[wc >> 8];
str[0]= pl ? pl[wc & 0xFF] : '\0';
return (!str[0] && wc) ? MY_CS_ILUNI : 1;
}
......
......@@ -101,6 +101,7 @@ my_strntol_mb2_or_mb4(CHARSET_INFO *cs,
int overflow;
int cnv;
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
register unsigned int cutlim;
register uint32 cutoff;
register uint32 res;
......@@ -111,7 +112,7 @@ my_strntol_mb2_or_mb4(CHARSET_INFO *cs,
*err= 0;
do
{
if ((cnv= cs->cset->mb_wc(cs, &wc, s, e))>0)
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
switch (wc)
{
......@@ -141,7 +142,7 @@ bs:
cutlim= (uint) (((uint32)~0L) % (uint32) base);
do {
if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
s+= cnv;
if (wc >= '0' && wc <= '9')
......@@ -212,6 +213,7 @@ my_strntoul_mb2_or_mb4(CHARSET_INFO *cs,
int overflow;
int cnv;
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
register unsigned int cutlim;
register uint32 cutoff;
register uint32 res;
......@@ -222,7 +224,7 @@ my_strntoul_mb2_or_mb4(CHARSET_INFO *cs,
*err= 0;
do
{
if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
switch (wc)
{
......@@ -253,7 +255,7 @@ bs:
do
{
if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
s+= cnv;
if (wc >= '0' && wc <= '9')
......@@ -316,6 +318,7 @@ my_strntoll_mb2_or_mb4(CHARSET_INFO *cs,
int overflow;
int cnv;
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
register ulonglong cutoff;
register unsigned int cutlim;
register ulonglong res;
......@@ -326,7 +329,7 @@ my_strntoll_mb2_or_mb4(CHARSET_INFO *cs,
*err= 0;
do
{
if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
switch (wc)
{
......@@ -356,7 +359,7 @@ bs:
cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
do {
if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
s+=cnv;
if ( wc>='0' && wc<='9')
......@@ -427,6 +430,7 @@ my_strntoull_mb2_or_mb4(CHARSET_INFO *cs,
int overflow;
int cnv;
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
register ulonglong cutoff;
register unsigned int cutlim;
register ulonglong res;
......@@ -437,7 +441,7 @@ my_strntoull_mb2_or_mb4(CHARSET_INFO *cs,
*err= 0;
do
{
if ((cnv= cs->cset->mb_wc(cs,&wc,s,e)) > 0)
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
switch (wc)
{
......@@ -468,7 +472,7 @@ bs:
do
{
if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
s+=cnv;
if ( wc>='0' && wc<='9')
......@@ -533,6 +537,7 @@ my_strntod_mb2_or_mb4(CHARSET_INFO *cs,
register const uchar *s= (const uchar*) nptr;
const uchar *end;
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
int cnv;
*err= 0;
......@@ -541,7 +546,7 @@ my_strntod_mb2_or_mb4(CHARSET_INFO *cs,
length= sizeof(buf) - 1;
end= s + length;
while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0)
while ((cnv= mb_wc(cs, &wc, s, end)) > 0)
{
s+= cnv;
if (wc > (int) (uchar) 'e' || !wc)
......@@ -566,6 +571,7 @@ my_strntoull10rnd_mb2_or_mb4(CHARSET_INFO *cs,
ulonglong res;
const uchar *end, *s= (const uchar*) nptr;
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
int cnv;
/* Cut too long strings */
......@@ -573,7 +579,7 @@ my_strntoull10rnd_mb2_or_mb4(CHARSET_INFO *cs,
length= sizeof(buf)-1;
end= s + length;
while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0)
while ((cnv= mb_wc(cs, &wc, s, end)) > 0)
{
s+= cnv;
if (wc > (int) (uchar) 'e' || !wc)
......@@ -712,23 +718,36 @@ static longlong
my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)),
const char *nptr, char **endptr, int *error)
{
const char *s, *end, *start, *n_end, *true_end;
const uchar *s, *end, *start, *n_end, *true_end;
uchar c;
unsigned long i, j, k;
ulonglong li;
int negative;
ulong cutoff, cutoff2, cutoff3;
my_wc_t wc;
int res;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
s= nptr;
s= (const uchar *) nptr;
/* If fixed length string */
if (endptr)
{
/* Make sure string length is even */
end= s + ((*endptr - s) / 2) * 2;
while (s < end && !s[0] && (s[1] == ' ' || s[1] == '\t'))
s+= 2;
if (s == end)
goto no_conv;
/*
Make sure string length is even.
Odd length indicates a bug in the caller.
Assert in debug, round in production.
*/
DBUG_ASSERT((*endptr - (const char *) s) % 2 == 0);
end= s + ((*endptr - (const char*) s) / 2) * 2;
for ( ; ; ) /* Skip leading spaces and tabs */
{
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
s+= res;
if (wc != ' ' && wc != '\t')
break;
}
}
else
{
......@@ -738,13 +757,13 @@ my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)),
/* Check for a sign. */
negative= 0;
if (!s[0] && s[1] == '-')
if (wc == '-')
{
*error= -1; /* Mark as negative number */
negative= 1;
s+= 2;
if (s == end)
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
s+= res; /* wc is now expected to hold the first digit. */
cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
cutoff3= MAX_NEGATIVE_NUMBER % 100;
......@@ -752,46 +771,53 @@ my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)),
else
{
*error= 0;
if (!s[0] && s[1] == '+')
if (wc == '+')
{
s+= 2;
if (s == end)
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
s+= res; /* wc is now expected to hold the first digit. */
}
cutoff= ULONGLONG_MAX / LFACTOR2;
cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
cutoff3= ULONGLONG_MAX % 100;
}
/* Handle case where we have a lot of pre-zero */
if (!s[0] && s[1] == '0')
/*
The code below assumes that 'wc' holds the first digit
and 's' points to the next character after it.
Scan pre-zeros if any.
*/
if (wc == '0')
{
i= 0;
do
for ( ; ; s+= res)
{
s+= 2;
if (s == end)
goto end_i; /* Return 0 */
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
if (wc != '0')
break;
}
while (!s[0] && s[1] == '0');
n_end= s + 2 * INIT_CNT;
}
else
{
/* Read first digit to check that it's a valid number */
if (s[0] || (c= (s[1]-'0')) > 9)
if ((i= (wc - '0')) > 9)
goto no_conv;
i= c;
s+= 2;
n_end= s + 2 * (INIT_CNT-1);
}
/* Handle first 9 digits and store them in i */
if (n_end > end)
n_end= end;
for (; s != n_end ; s+= 2)
for ( ; ; s+= res)
{
if (s[0] || (c= (s[1]-'0')) > 9)
if ((res= mb_wc(cs, &wc, s, n_end)) <= 0)
break;
if ((c= (wc - '0')) > 9)
goto end_i;
i= i*10+c;
}
......@@ -806,10 +832,12 @@ my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)),
n_end= end;
do
{
if (s[0] || (c= (s[1]-'0')) > 9)
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
if ((c= (wc - '0')) > 9)
goto end_i_and_j;
j= j*10+c;
s+= 2;
s+= res;
j= j * 10 + c;
} while (s != n_end);
if (s == end)
{
......@@ -817,20 +845,26 @@ my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)),
goto end_i_and_j;
goto end3;
}
if (s[0] || (c= (s[1]-'0')) > 9)
goto end3;
/* Handle the next 1 or 2 digits and store them in k */
k=c;
s+= 2;
if (s == end || s[0] || (c= (s[1]-'0')) > 9)
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
if ((k= (wc - '0')) > 9)
goto end3;
s+= res;
if (s == end)
goto end4;
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
if ((c= (wc - '0')) > 9)
goto end4;
s+= res;
k= k*10+c;
s+= 2;
*endptr= (char*) s;
/* number string should have ended here */
if (s != end && !s[0] && (c= (s[1]-'0')) <= 9)
if (s != end && mb_wc(cs, &wc, s, end) > 0 && ((uchar) (wc - '0')) <= 9)
goto overflow;
/* Check that we didn't get an overflow with the last digit */
......@@ -882,15 +916,18 @@ my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)),
const char *str, const char *end, int sequence_type)
{
const char *str0= str;
end--; /* for easier loop condition, because of two bytes per character */
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
int res;
switch (sequence_type)
{
case MY_SEQ_SPACES:
for ( ; str < end; str+= 2)
for (res= mb_wc(cs, &wc, (const uchar *) str, (const uchar *) end);
res > 0 && wc == ' ';
str+= res,
res= mb_wc(cs, &wc, (const uchar *) str, (const uchar *) end))
{
if (str[0] != '\0' || str[1] != ' ')
break;
}
return (size_t) (str - str0);
default:
......@@ -900,11 +937,33 @@ my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)),
static void
my_fill_mb2(CHARSET_INFO *cs __attribute__((unused)),
char *s, size_t l, int fill)
my_fill_mb2(CHARSET_INFO *cs, char *s, size_t slen, int fill)
{
DBUG_ASSERT(fill <= 0xFFFF);
for ( ; l >= 2; s[0]= (fill >> 8), s[1]= (fill & 0xFF), s+= 2, l-= 2);
char buf[10], *last;
int buflen, remainder;
DBUG_ASSERT((slen % 2) == 0);
buflen= cs->cset->wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
(uchar*) buf + sizeof(buf));
DBUG_ASSERT(buflen > 0);
/*
"last" in the last position where a sequence of "buflen" bytes can start.
*/
for (last= s + slen - buflen; s <= last; s+= buflen)
{
/* Enough space for the characer */
memcpy(s, buf, (size_t) buflen);
}
/*
If there are some more space which is not enough
for the whole multibyte character, then add trailing zeros.
*/
if ((remainder= last + buflen - s) > 0)
bzero(s, (size_t) remainder);
}
......@@ -1018,11 +1077,26 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
DB80..DBFF - Private surrogate high (128 pages)
DC00..DFFF - Surrogate low (1024 codes in a page)
*/
#define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
#define MY_UTF16_SURROGATE_HIGH_LAST 0xDBFF
#define MY_UTF16_SURROGATE_LOW_FIRST 0xDC00
#define MY_UTF16_SURROGATE_LOW_LAST 0xDFFF
#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
#define MY_UTF16_WC2(a, b) ((a << 8) + b)
/*
a= 110110?? (<< 18)
b= ???????? (<< 10)
c= 110111?? (<< 8)
d= ???????? (<< 0)
*/
#define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
((c & 3) << 8) + d + 0x10000)
static int
my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *pwc, const uchar *s, const uchar *e)
......@@ -1044,23 +1118,14 @@ my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
if (!MY_UTF16_LOW_HEAD(s[2])) /* Broken surrigate pair */
return MY_CS_ILSEQ;
/*
s[0]= 110110?? (<< 18)
s[1]= ???????? (<< 10)
s[2]= 110111?? (<< 8)
s[3]= ???????? (<< 0)
*/
*pwc= ((s[0] & 3) << 18) + (s[1] << 10) +
((s[2] & 3) << 8) + s[3] + 0x10000;
*pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
return 4;
}
if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
return MY_CS_ILSEQ;
*pwc= (s[0] << 8) + s[1];
*pwc= MY_UTF16_WC2(s[0], s[1]);
return 2;
}
......@@ -1098,7 +1163,7 @@ my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
static inline void
my_tolower_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
{
int page= *wc >> 8;
uint page= *wc >> 8;
if (page < 256 && uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].tolower;
}
......@@ -1107,7 +1172,7 @@ my_tolower_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
static inline void
my_toupper_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
{
int page= *wc >> 8;
uint page= *wc >> 8;
if (page < 256 && uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].toupper;
}
......@@ -1116,7 +1181,7 @@ my_toupper_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
static inline void
my_tosort_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
{
int page= *wc >> 8;
uint page= *wc >> 8;
if (page < 256)
{
if (uni_plane[page])
......@@ -1135,16 +1200,18 @@ my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
size_t dstlen __attribute__((unused)))
{
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
int res;
char *srcend= src + srclen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((src < srcend) &&
(res= my_utf16_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
(res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
{
my_toupper_utf16(uni_plane, &wc);
if (res != my_uni_utf16(cs, wc, (uchar*) src, (uchar*) srcend))
if (res != wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
break;
src+= res;
}
......@@ -1157,14 +1224,12 @@ my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *n1, ulong *n2)
{
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
int res;
const uchar *e= s+slen;
const uchar *e= s + cs->cset->lengthsp(cs, (const char *) s, slen);
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
while (e > s + 1 && e[-1] == ' ' && e[-2] == '\0')
e-= 2;
while ((s < e) && (res= my_utf16_uni(cs, &wc, (uchar *)s, (uchar*)e)) > 0)
while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0)
{
my_tosort_utf16(uni_plane, &wc);
n1[0]^= (((n1[0] & 63) + n2[0]) * (wc & 0xFF)) + (n1[0] << 8);
......@@ -1182,16 +1247,18 @@ my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
size_t dstlen __attribute__((unused)))
{
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
int res;
char *srcend= src + srclen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((src < srcend) &&
(res= my_utf16_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
(res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
{
my_tolower_utf16(uni_plane, &wc);
if (res != my_uni_utf16(cs, wc, (uchar*) src, (uchar*) srcend))
if (res != wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
break;
src+= res;
}
......@@ -1207,14 +1274,15 @@ my_strnncoll_utf16(CHARSET_INFO *cs,
{
int s_res, t_res;
my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
const uchar *se= s + slen;
const uchar *te= t + tlen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
while (s < se && t < te)
{
s_res= my_utf16_uni(cs, &s_wc, s, se);
t_res= my_utf16_uni(cs, &t_wc, t, te);
s_res= mb_wc(cs, &s_wc, s, se);
t_res= mb_wc(cs, &t_wc, t, te);
if (s_res <= 0 || t_res <= 0)
{
......@@ -1271,6 +1339,7 @@ my_strnncollsp_utf16(CHARSET_INFO *cs,
{
int res;
my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
const uchar *se= s + slen, *te= t + tlen;
MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
......@@ -1283,8 +1352,8 @@ my_strnncollsp_utf16(CHARSET_INFO *cs,
while (s < se && t < te)
{
int s_res= my_utf16_uni(cs, &s_wc, s, se);
int t_res= my_utf16_uni(cs, &t_wc, t, te);
int s_res= mb_wc(cs, &s_wc, s, se);
int t_res= mb_wc(cs, &t_wc, t, te);
if (s_res <= 0 || t_res <= 0)
{
......@@ -1324,7 +1393,7 @@ my_strnncollsp_utf16(CHARSET_INFO *cs,
for ( ; s < se; s+= s_res)
{
if ((s_res= my_utf16_uni(cs, &s_wc, s, se)) < 0)
if ((s_res= mb_wc(cs, &s_wc, s, se)) < 0)
{
DBUG_ASSERT(0);
return 0;
......@@ -1338,22 +1407,11 @@ my_strnncollsp_utf16(CHARSET_INFO *cs,
static uint
my_ismbchar_utf16(CHARSET_INFO *cs __attribute__((unused)),
const char *b __attribute__((unused)),
const char *e __attribute__((unused)))
my_ismbchar_utf16(CHARSET_INFO *cs, const char *b, const char *e)
{
if (b + 2 > e)
return 0;
if (MY_UTF16_HIGH_HEAD(*b))
{
return (b + 4 <= e) && MY_UTF16_LOW_HEAD(b[2]) ? 4 : 0;
}
if (MY_UTF16_LOW_HEAD(*b))
return 0;
return 2;
my_wc_t wc;
int res= cs->cset->mb_wc(cs, &wc, (const uchar *) b, (const uchar *) e);
return (uint) (res > 0 ? res : 0);
}
......@@ -1361,6 +1419,7 @@ static uint
my_mbcharlen_utf16(CHARSET_INFO *cs __attribute__((unused)),
uint c __attribute__((unused)))
{
DBUG_ASSERT(0);
return MY_UTF16_HIGH_HEAD(c) ? 4 : 2;
}
......@@ -1449,13 +1508,14 @@ my_strnncoll_utf16_bin(CHARSET_INFO *cs,
{
int s_res,t_res;
my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
const uchar *se=s+slen;
const uchar *te=t+tlen;
while ( s < se && t < te )
{
s_res= my_utf16_uni(cs,&s_wc, s, se);
t_res= my_utf16_uni(cs,&t_wc, t, te);
s_res= mb_wc(cs, &s_wc, s, se);
t_res= mb_wc(cs, &t_wc, t, te);
if (s_res <= 0 || t_res <= 0)
{
......@@ -1482,6 +1542,7 @@ my_strnncollsp_utf16_bin(CHARSET_INFO *cs,
{
int res;
my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
const uchar *se= s + slen, *te= t + tlen;
DBUG_ASSERT((slen % 2) == 0);
......@@ -1493,8 +1554,8 @@ my_strnncollsp_utf16_bin(CHARSET_INFO *cs,
while (s < se && t < te)
{
int s_res= my_utf16_uni(cs, &s_wc, s, se);
int t_res= my_utf16_uni(cs, &t_wc, t, te);
int s_res= mb_wc(cs, &s_wc, s, se);
int t_res= mb_wc(cs, &t_wc, t, te);
if (s_res <= 0 || t_res <= 0)
{
......@@ -1531,7 +1592,7 @@ my_strnncollsp_utf16_bin(CHARSET_INFO *cs,
for ( ; s < se; s+= s_res)
{
if ((s_res= my_utf16_uni(cs, &s_wc, s, se)) < 0)
if ((s_res= mb_wc(cs, &s_wc, s, se)) < 0)
{
DBUG_ASSERT(0);
return 0;
......@@ -1545,17 +1606,11 @@ my_strnncollsp_utf16_bin(CHARSET_INFO *cs,
static void
my_hash_sort_utf16_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,ulong *nr1, ulong *nr2)
my_hash_sort_utf16_bin(CHARSET_INFO *cs,
const uchar *pos, size_t len, ulong *nr1, ulong *nr2)
{
const uchar *pos = key;
key+= len;
while (key > pos + 1 && key[-1] == ' ' && key[-2] == '\0')
key-= 2;
for (; pos < (uchar*) key ; pos++)
const uchar *end= pos + cs->cset->lengthsp(cs, (const char *) pos, len);
for ( ; pos < end ; pos++)
{
nr1[0]^= (ulong) ((((uint) nr1[0] & 63) + nr2[0]) *
((uint)*pos)) + (nr1[0] << 8);
......@@ -1664,7 +1719,7 @@ struct charset_info_st my_charset_utf16_general_ci=
struct charset_info_st my_charset_utf16_bin=
{
55,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
"utf16", /* cs name */
"utf16_bin", /* name */
"UTF-16 Unicode", /* comment */
......@@ -1693,6 +1748,176 @@ struct charset_info_st my_charset_utf16_bin=
&my_collation_utf16_bin_handler
};
static int
my_utf16le_uni(const CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *pwc, const uchar *s, const uchar *e)
{
my_wc_t lo;
if (s + 2 > e)
return MY_CS_TOOSMALL2;
if ((*pwc= uint2korr(s)) < MY_UTF16_SURROGATE_HIGH_FIRST ||
(*pwc > MY_UTF16_SURROGATE_LOW_LAST))
return 2; /* [0000-D7FF,E000-FFFF] */
if (*pwc >= MY_UTF16_SURROGATE_LOW_FIRST)
return MY_CS_ILSEQ; /* [DC00-DFFF] Low surrogate part without high part */
if (s + 4 > e)
return MY_CS_TOOSMALL4;
s+= 2;
if ((lo= uint2korr(s)) < MY_UTF16_SURROGATE_LOW_FIRST ||
lo > MY_UTF16_SURROGATE_LOW_LAST)
return MY_CS_ILSEQ; /* Expected low surrogate part, got something else */
*pwc= 0x10000 + (((*pwc & 0x3FF) << 10) | (lo & 0x3FF));
return 4;
}
static int
my_uni_utf16le(const CHARSET_INFO *cs __attribute__((unused)),
my_wc_t wc, uchar *s, uchar *e)
{
uint32 first, second, total;
if (wc < MY_UTF16_SURROGATE_HIGH_FIRST ||
(wc > MY_UTF16_SURROGATE_LOW_LAST &&
wc <= 0xFFFF))
{
if (s + 2 > e)
return MY_CS_TOOSMALL2;
int2store(s, wc);
return 2; /* [0000-D7FF,E000-FFFF] */
}
if (wc < 0xFFFF || wc > 0x10FFFF)
return MY_CS_ILUNI; /* [D800-DFFF,10FFFF+] */
if (s + 4 > e)
return MY_CS_TOOSMALL4;
wc-= 0x10000;
first= (0xD800 | ((wc >> 10) & 0x3FF));
second= (0xDC00 | (wc & 0x3FF));
total= first | (second << 16);
int4store(s, total);
return 4; /* [010000-10FFFF] */
}
static size_t
my_lengthsp_utf16le(const CHARSET_INFO *cs __attribute__((unused)),
const char *ptr, size_t length)
{
const char *end= ptr + length;
while (end > ptr + 1 && uint2korr(end - 2) == ' ')
end-= 2;
return (size_t) (end - ptr);
}
static MY_CHARSET_HANDLER my_charset_utf16le_handler=
{
NULL, /* init */
my_ismbchar_utf16,
my_mbcharlen_utf16,
my_numchars_utf16,
my_charpos_utf16,
my_well_formed_len_utf16,
my_lengthsp_utf16le,
my_numcells_mb,
my_utf16le_uni, /* mb_wc */
my_uni_utf16le, /* wc_mb */
my_mb_ctype_mb,
my_caseup_str_mb2_or_mb4,
my_casedn_str_mb2_or_mb4,
my_caseup_utf16,
my_casedn_utf16,
my_snprintf_mb2,
my_l10tostr_mb2_or_mb4,
my_ll10tostr_mb2_or_mb4,
my_fill_mb2,
my_strntol_mb2_or_mb4,
my_strntoul_mb2_or_mb4,
my_strntoll_mb2_or_mb4,
my_strntoull_mb2_or_mb4,
my_strntod_mb2_or_mb4,
my_strtoll10_mb2,
my_strntoull10rnd_mb2_or_mb4,
my_scan_mb2
};
struct charset_info_st my_charset_utf16le_general_ci=
{
56,0,0, /* number */
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
"utf16le", /* cs name */
"utf16le_general_ci",/* name */
"UTF-16LE Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
&my_charset_utf16le_handler,
&my_collation_utf16_general_ci_handler
};
struct charset_info_st my_charset_utf16le_bin=
{
62,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
"utf16le", /* cs name */
"utf16le_bin", /* name */
"UTF-16LE Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
&my_charset_utf16le_handler,
&my_collation_utf16_bin_handler
};
#endif /* HAVE_CHARSET_utf16 */
......@@ -1727,7 +1952,7 @@ my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
static inline void
my_tolower_utf32(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
{
int page= *wc >> 8;
uint page= *wc >> 8;
if (page < 256 && uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].tolower;
}
......@@ -1736,7 +1961,7 @@ my_tolower_utf32(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
static inline void
my_toupper_utf32(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
{
int page= *wc >> 8;
uint page= *wc >> 8;
if (page < 256 && uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].toupper;
}
......@@ -1745,7 +1970,7 @@ my_toupper_utf32(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
static inline void
my_tosort_utf32(MY_UNICASE_INFO *const* uni_plane, my_wc_t *wc)
{
int page= *wc >> 8;
uint page= *wc >> 8;
if (page < 256)
{
if (uni_plane[page])
......@@ -2216,7 +2441,7 @@ my_strtoll10_utf32(CHARSET_INFO *cs __attribute__((unused)),
if (s == end || s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
goto end4;
k= k * 10 + c;
s+= 2;
s+= 4;
*endptr= (char*) s;
/* number string should have ended here */
......@@ -2589,7 +2814,7 @@ struct charset_info_st my_charset_utf32_general_ci=
struct charset_info_st my_charset_utf32_bin=
{
61,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
"utf32", /* cs name */
"utf32_bin", /* name */
"UTF-32 Unicode", /* comment */
......@@ -2708,6 +2933,35 @@ static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
}
static inline void
my_tolower_ucs2(MY_UNICASE_INFO *const *uni_plane, my_wc_t *wc)
{
uint page= *wc >> 8;
DBUG_ASSERT(page < 256);
if (uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].tolower;
}
static inline void
my_toupper_ucs2(MY_UNICASE_INFO *const *uni_plane, my_wc_t *wc)
{
uint page= *wc >> 8;
DBUG_ASSERT(page < 256);
if (uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].toupper;
}
static inline void
my_tosort_ucs2(MY_UNICASE_INFO *const *uni_plane, my_wc_t *wc)
{
uint page= *wc >> 8;
DBUG_ASSERT(page < 256);
if (uni_plane[page])
*wc= uni_plane[page][*wc & 0xFF].sort;
}
static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
char *dst __attribute__((unused)),
size_t dstlen __attribute__((unused)))
......@@ -2721,8 +2975,7 @@ static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
while ((src < srcend) &&
(res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
{
int plane= (wc>>8) & 0xFF;
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
my_toupper_ucs2(uni_plane, &wc);
if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
break;
src+= res;
......@@ -2744,8 +2997,7 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
{
int plane = (wc>>8) & 0xFF;
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
my_tosort_ucs2(uni_plane, &wc);
n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
n2[0]+=3;
n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
......@@ -2768,8 +3020,7 @@ static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
while ((src < srcend) &&
(res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
{
int plane= (wc>>8) & 0xFF;
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
my_tolower_ucs2(uni_plane, &wc);
if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
break;
src+= res;
......@@ -2778,6 +3029,18 @@ static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
}
static void
my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)),
char *s, size_t l, int fill)
{
char *last= s + l - 2;
uint16 tmp= (fill >> 8) + ((fill & 0xFF) << 8); /* swap bytes */
DBUG_ASSERT(fill <= 0xFFFF);
for ( ; s <= last; s+= 2)
int2store(s, tmp); /* store little-endian */
}
static int my_strnncoll_ucs2(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
......@@ -2791,7 +3054,6 @@ static int my_strnncoll_ucs2(CHARSET_INFO *cs,
while ( s < se && t < te )
{
int plane;
s_res=my_ucs2_uni(cs,&s_wc, s, se);
t_res=my_ucs2_uni(cs,&t_wc, t, te);
......@@ -2801,10 +3063,9 @@ static int my_strnncoll_ucs2(CHARSET_INFO *cs,
return ((int)s[0]-(int)t[0]);
}
plane=(s_wc>>8) & 0xFF;
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
plane=(t_wc>>8) & 0xFF;
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
my_tosort_ucs2(uni_plane, &s_wc);
my_tosort_ucs2(uni_plane, &t_wc);
if ( s_wc != t_wc )
{
return s_wc > t_wc ? 1 : -1;
......@@ -3115,7 +3376,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
my_snprintf_mb2,
my_l10tostr_mb2_or_mb4,
my_ll10tostr_mb2_or_mb4,
my_fill_mb2,
my_fill_ucs2,
my_strntol_mb2_or_mb4,
my_strntoul_mb2_or_mb4,
my_strntoll_mb2_or_mb4,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment