Bug#20471 LIKE search fails with indexed utf8 char column

The main problem was already fixed by Igor under terms of 16674.
Adding some additional minor fixes and tests.
parent 2eacb14d
......@@ -108,6 +108,8 @@ enum my_lex_states
struct charset_info_st;
/* See strings/CHARSET_INFO.txt about information on this structure */
typedef struct my_collation_handler_st
{
my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
......@@ -147,6 +149,7 @@ extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
/* See strings/CHARSET_INFO.txt about information on this structure */
typedef struct my_charset_handler_st
{
my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
......@@ -204,6 +207,7 @@ extern MY_CHARSET_HANDLER my_charset_8bit_handler;
extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
/* See strings/CHARSET_INFO.txt about information on this structure */
typedef struct charset_info_st
{
uint number;
......
......@@ -1124,6 +1124,81 @@ check table t1;
Table Op Msg_type Msg_text
test.t1 check status OK
drop table t1;
set names utf8;
create table t1 (s1 char(5) character set utf8);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
before_delete_general_ci
ペテルグル
delete from t1 where s1 = 'Y';
select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
after_delete_general_ci
ペテルグル
drop table t1;
set names utf8;
create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
before_delete_unicode_ci
ペテルグル
delete from t1 where s1 = 'Y';
select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
after_delete_unicode_ci
ペテルグル
drop table t1;
set names utf8;
create table t1 (s1 char(5) character set utf8 collate utf8_bin);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
before_delete_bin
ペテルグル
delete from t1 where s1 = 'Y';
select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
after_delete_bin
ペテルグル
drop table t1;
set names utf8;
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_general_ci;
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
gci1
さしすせそかきくけこあいうえお
select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
gci2
あいうえおかきくけこさしすせそ
drop table t1;
set names utf8;
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_unicode_ci;
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
uci1
さしすせそかきくけこあいうえお
select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
uci2
あいうえおかきくけこさしすせそ
drop table t1;
set names utf8;
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_bin;
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
bin1
さしすせそかきくけこあいうえお
select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
bin2
あいうえおかきくけこさしすせそ
drop table t1;
SET NAMES utf8;
CREATE TABLE t1 (id int PRIMARY KEY,
a varchar(16) collate utf8_unicode_ci NOT NULL default '',
......
......@@ -926,6 +926,76 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb
check table t1;
drop table t1;
#
# Bug#20471 LIKE search fails with indexed utf8 char column
#
set names utf8;
create table t1 (s1 char(5) character set utf8);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
delete from t1 where s1 = 'Y';
select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
drop table t1;
set names utf8;
create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
delete from t1 where s1 = 'Y';
select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
drop table t1;
set names utf8;
create table t1 (s1 char(5) character set utf8 collate utf8_bin);
insert into t1 values
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
create index it1 on t1 (s1);
select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
delete from t1 where s1 = 'Y';
select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
drop table t1;
# additional tests from duplicate bug#20744 MySQL return no result
set names utf8;
--disable_warnings
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_general_ci;
--enable_warnings
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
drop table t1;
set names utf8;
--disable_warnings
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_unicode_ci;
--enable_warnings
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
drop table t1;
set names utf8;
--disable_warnings
create table t1 (a varchar(30) not null primary key)
engine=innodb default character set utf8 collate utf8_bin;
--enable_warnings
insert into t1 values ('あいうえおかきくけこさしすせそ');
insert into t1 values ('さしすせそかきくけこあいうえお');
select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
drop table t1;
#
# Bug#14896: Comparison with a key in a partial index over mb chararacter field
#
......
......@@ -33,7 +33,7 @@ typedef struct charset_info_st
uint strxfrm_multiply;
uint mbminlen;
uint mbmaxlen;
char max_sort_char; /* For LIKE optimization */
uint16 max_sort_char; /* For LIKE optimization */
MY_CHARSET_HANDLER *cset;
MY_COLLATION_HANDLER *coll;
......@@ -134,7 +134,15 @@ Misc fields
mbmaxlen - maximum multibyte sequence length.
1 for 8bit charsets. Can be also 2 or 3.
max_sort_char - for LIKE range
in case of 8bit character sets - native code
of maximum character (max_str pad byte);
in case of UTF8 and UCS2 - Unicode code of the maximum
possible character (usually U+FFFF). This code is
converted to multibyte representation (usually 0xEFBFBF)
and then used as a pad sequence for max_str.
in case of other multibyte character sets -
max_str pad byte (usually 0xFF).
MY_CHARSET_HANDLER
==================
......
......@@ -449,15 +449,28 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
/*
Write max key: create a buffer with multibyte
Write max key:
- for non-Unicode character sets:
just set to 255.
- for Unicode character set (utf-8):
create a buffer with multibyte
representation of the max_sort_char character,
and copy it into max_str in a loop.
*/
static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
{
char buf[10];
char buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
(uchar*) buf + sizeof(buf));
char buflen;
if (!(cs->state & MY_CS_UNICODE))
{
bfill(str, end - str, 255);
return;
}
buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
(uchar*) buf + sizeof(buf));
DBUG_ASSERT(buflen > 0);
do
{
......@@ -894,7 +907,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin_handler =
my_strnncoll_mb_bin,
my_strnncollsp_mb_bin,
my_strnxfrm_mb_bin,
my_like_range_simple,
my_like_range_mb,
my_wildcmp_mb_bin,
my_strcasecmp_mb_bin,
my_instr_mb,
......
......@@ -2373,7 +2373,7 @@ CHARSET_INFO my_charset_utf8_bin=
1, /* mbminlen */
3, /* mbmaxlen */
0, /* min_sort_char */
255, /* max_sort_char */
0xFFFF, /* max_sort_char */
0, /* escape_with_backslash_is_dangerous */
&my_charset_utf8_handler,
&my_collation_mb_bin_handler
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment