Commit 1faafac4 authored by bar@bar.mysql.r18.ru's avatar bar@bar.mysql.r18.ru

Bug 1181 fix. LIKE didn't work with UCS2 character set.

parent 3ff17e7d
...@@ -13,6 +13,133 @@ hex(word) ...@@ -13,6 +13,133 @@ hex(word)
0420 0420
2004 2004
DROP TABLE t1; DROP TABLE t1;
SET NAMES koi8r;
SET character_set_connection=ucs2;
create table t1 (a varchar(10) character set ucs2, key(a));
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 21 NULL 1 Using where; Using index
explain select * from t1 where a like concat('abc','%');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 21 NULL 1 Using where; Using index
select * from t1 where a like "abc%";
a
abc
abcd
select * from t1 where a like concat("abc","%");
a
abc
abcd
select * from t1 where a like "ABC%";
a
abc
abcd
select * from t1 where a like "test%";
a
test
select * from t1 where a like "te_t";
a
test
select * from t1 where a like "%a%";
a
select * from t1 where a like "%abcd%";
a
abcd
select * from t1 where a like "%abc\d%";
a
abcd
drop table t1;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ucs2);
INSERT INTO t1 VALUES (''),(''),(''),(''),(''),('');
INSERT INTO t1 VALUES (''),(''),(''),('');
INSERT INTO t1 VALUES (''),(''),(''),('');
INSERT INTO t1 VALUES (''),(''),(''),('');
SELECT * FROM t1 WHERE a LIKE '%%';
a
SELECT * FROM t1 WHERE a LIKE '%%';
a
SELECT * FROM t1 WHERE a LIKE '%';
a
SELECT * FROM t1 WHERE a LIKE '%' COLLATE ucs2_bin;
a
DROP TABLE t1;
CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
TYPE=MyISAM CHARACTER SET ucs2 COLLATE ucs2_general_ci;
INSERT INTO t1 (word) VALUES ("cat");
SELECT * FROM t1 WHERE word LIKE "c%";
word
cat
SELECT * FROM t1 WHERE word LIKE "ca_";
word
cat
SELECT * FROM t1 WHERE word LIKE "cat";
word
cat
SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630025';
word
cat
SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630061005F';
word
cat
DROP TABLE t1;
SET NAMES latin1;
CREATE TABLE t1 ( CREATE TABLE t1 (
word VARCHAR(64), word VARCHAR(64),
bar INT(11) default 0, bar INT(11) default 0,
......
...@@ -29,6 +29,51 @@ INSERT INTO t1 VALUES (X'042000200020'), (X'200400200020'); ...@@ -29,6 +29,51 @@ INSERT INTO t1 VALUES (X'042000200020'), (X'200400200020');
SELECT hex(word) FROM t1 ORDER BY word; SELECT hex(word) FROM t1 ORDER BY word;
DROP TABLE t1; DROP TABLE t1;
######################################################
#
# Test of like
#
SET NAMES koi8r;
SET character_set_connection=ucs2;
create table t1 (a varchar(10) character set ucs2, key(a));
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
explain select * from t1 where a like concat('abc','%');
select * from t1 where a like "abc%";
select * from t1 where a like concat("abc","%");
select * from t1 where a like "ABC%";
select * from t1 where a like "test%";
select * from t1 where a like "te_t";
select * from t1 where a like "%a%";
select * from t1 where a like "%abcd%";
select * from t1 where a like "%abc\d%";
drop table t1;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ucs2);
INSERT INTO t1 VALUES (''),(''),(''),(''),(''),('');
INSERT INTO t1 VALUES (''),(''),(''),('');
INSERT INTO t1 VALUES (''),(''),(''),('');
INSERT INTO t1 VALUES (''),(''),(''),('');
SELECT * FROM t1 WHERE a LIKE '%%';
SELECT * FROM t1 WHERE a LIKE '%%';
SELECT * FROM t1 WHERE a LIKE '%';
SELECT * FROM t1 WHERE a LIKE '%' COLLATE ucs2_bin;
DROP TABLE t1;
#
# Bug 1181
#
CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
TYPE=MyISAM CHARACTER SET ucs2 COLLATE ucs2_general_ci;
INSERT INTO t1 (word) VALUES ("cat");
SELECT * FROM t1 WHERE word LIKE "c%";
SELECT * FROM t1 WHERE word LIKE "ca_";
SELECT * FROM t1 WHERE word LIKE "cat";
SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630025';
SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630061005F';
DROP TABLE t1;
###################################################### ######################################################
...@@ -53,6 +98,7 @@ DROP TABLE t1; ...@@ -53,6 +98,7 @@ DROP TABLE t1;
# are not part of the index sorted on, it does a filesort, which fails. # are not part of the index sorted on, it does a filesort, which fails.
# Using a straight index yields correct results. # Using a straight index yields correct results.
SET NAMES latin1;
# #
# Two fields, index # Two fields, index
......
...@@ -90,7 +90,7 @@ static uchar to_upper_ucs2[] = { ...@@ -90,7 +90,7 @@ static uchar to_upper_ucs2[] = {
}; };
static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)) , static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s, const uchar *e) my_wc_t * pwc, const uchar *s, const uchar *e)
{ {
if (s+2 > e) /* Need 2 characters */ if (s+2 > e) /* Need 2 characters */
...@@ -1018,21 +1018,268 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)), ...@@ -1018,21 +1018,268 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
return (uint) (end-ptr); return (uint) (end-ptr);
} }
/*
** Compare string against string with wildcard
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
static
int my_wildcmp_ucs2(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many,
MY_UNICASE_INFO **weights)
{
int result= -1; /* Not found, using wildcards */
my_wc_t s_wc, w_wc;
int scan, plane;
while (wildstr != wildend)
{
while (1)
{
scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
if (scan <= 0)
return 1;
wildstr+= scan;
if (w_wc == (my_wc_t)escape)
{
scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
if (scan <= 0)
return 1;
wildstr+= scan;
}
if (w_wc == (my_wc_t)w_many)
{
result= 1; /* Found an anchor char */
break;
}
scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end);
if (scan <=0)
return 1;
str+= scan;
if (w_wc == (my_wc_t)w_one)
{
result= 1; /* Found an anchor char */
}
else
{
if (weights)
{
plane=(s_wc>>8) & 0xFF;
s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
plane=(w_wc>>8) & 0xFF;
w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
}
if (s_wc != w_wc)
return 1; /* No match */
}
if (wildstr == wildend)
return (str != str_end); /* Match if both are at end */
}
if (w_wc == (my_wc_t)w_many)
{ /* Found w_many */
/* Remove any '%' and '_' from the wild search string */
for ( ; wildstr != wildend ; )
{
scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
if (scan <= 0)
return 1;
wildstr+= scan;
if (w_wc == (my_wc_t)w_many)
continue;
if (w_wc == (my_wc_t)w_one)
{
scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end);
if (scan <=0)
return 1;
str+= scan;
continue;
}
break; /* Not a wild character */
}
if (wildstr == wildend)
return 0; /* Ok if w_many is last */
if (str == str_end)
return -1;
scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
if (scan <= 0)
return 1;
wildstr+= scan;
if (w_wc == (my_wc_t)escape)
{
scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
if (scan <= 0)
return 1;
wildstr+= scan;
}
do
{
/* Skip until the first character from wildstr is found */
while (str != str_end)
{
scan= my_ucs2_uni(cs,&s_wc, (const uchar*)str, (const uchar*)str_end);
if (scan <= 0)
return 1;
str+= scan;
if (weights)
{
plane=(s_wc>>8) & 0xFF;
s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
plane=(w_wc>>8) & 0xFF;
w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
}
if (s_wc == w_wc)
break;
}
if (str == str_end)
return -1;
result= my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,escape,
w_one,w_many,weights);
if (result <= 0)
return result;
} while (str != str_end && w_wc != (my_wc_t)w_many);
return -1;
}
}
return (str != str_end ? 1 : 0);
}
static MY_COLLATION_HANDLER my_collation_ci_handler = static
int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,uni_plane);
}
static
int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,NULL);
}
static
int my_strnncoll_ucs2_bin(CHARSET_INFO *cs,
const uchar *s, uint slen,
const uchar *t, uint tlen)
{
int s_res,t_res;
my_wc_t s_wc,t_wc;
const uchar *se=s+slen;
const uchar *te=t+tlen;
while ( s < se && t < te )
{
s_res=my_ucs2_uni(cs,&s_wc, s, se);
t_res=my_ucs2_uni(cs,&t_wc, t, te);
if ( s_res <= 0 || t_res <= 0 )
{
/* Incorrect string, compare by char value */
return ((int)s[0]-(int)t[0]);
}
if ( s_wc != t_wc )
{
return ((int) s_wc) - ((int) t_wc);
}
s+=s_res;
t+=t_res;
}
return ( (se-s) - (te-t) );
}
static
int my_strcasecmp_ucs2_bin(CHARSET_INFO *cs, const char *s, const char *t)
{
uint s_len=strlen(s);
uint t_len=strlen(t);
uint len = (s_len > t_len) ? s_len : t_len;
return my_strncasecmp_ucs2(cs, s, t, len);
}
static
int my_strnxfrm_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
uchar *dst, uint dstlen,
const uchar *src, uint srclen)
{
if (dst != src)
memcpy(dst,src,srclen= min(dstlen,srclen));
return srclen;
}
static
void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, uint len,ulong *nr1, ulong *nr2)
{
const uchar *pos = key;
key+= len;
for (; pos < (uchar*) key ; pos++)
{
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
((uint)*pos)) + (nr1[0] << 8);
nr2[0]+=3;
}
}
static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
{ {
my_strnncoll_ucs2, my_strnncoll_ucs2,
my_strnncoll_ucs2, my_strnncoll_ucs2,
my_strnxfrm_ucs2, my_strnxfrm_ucs2,
my_like_range_simple, my_like_range_simple,
my_wildcmp_mb, my_wildcmp_ucs2_ci,
my_strcasecmp_ucs2, my_strcasecmp_ucs2,
my_instr_mb, my_instr_mb,
my_hash_sort_ucs2 my_hash_sort_ucs2
}; };
static MY_CHARSET_HANDLER my_charset_handler= static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
{
my_strnncoll_ucs2_bin,
my_strnncoll_ucs2_bin,
my_strnxfrm_ucs2_bin,
my_like_range_simple,
my_wildcmp_ucs2_bin,
my_strcasecmp_ucs2_bin,
my_instr_mb,
my_hash_sort_ucs2_bin
};
static MY_CHARSET_HANDLER my_charset_ucs2_handler=
{ {
my_ismbchar_ucs2, /* ismbchar */ my_ismbchar_ucs2, /* ismbchar */
my_mbcharlen_ucs2, /* mbcharlen */ my_mbcharlen_ucs2, /* mbcharlen */
...@@ -1077,8 +1324,8 @@ CHARSET_INFO my_charset_ucs2_general_ci= ...@@ -1077,8 +1324,8 @@ CHARSET_INFO my_charset_ucs2_general_ci=
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_ucs2_handler,
&my_collation_ci_handler &my_collation_ucs2_general_ci_handler
}; };
...@@ -1100,8 +1347,8 @@ CHARSET_INFO my_charset_ucs2_bin= ...@@ -1100,8 +1347,8 @@ CHARSET_INFO my_charset_ucs2_bin=
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_ucs2_handler,
&my_collation_mb_bin_handler &my_collation_ucs2_bin_handler
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment