Commit 2496e85b authored by bar@mysql.com's avatar bar@mysql.com

Bug#4521: unique key prefix interacts poorly with utf8.

Fix for binary collations for MyISAM and HEAP BTREE.
This patch also changes trailing spaces behaviour for
binary collations. Binary collations now have PAD 
characteristic too.
parent 6b90806a
...@@ -396,9 +396,18 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, ...@@ -396,9 +396,18 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
matched=prefix_len+left; matched=prefix_len+left;
for (my_flag=0;left;left--) if (sort_order)
if ((my_flag= (int) sort_order[*vseg++] - (int) sort_order[*k++])) {
break; for (my_flag=0;left;left--)
if ((my_flag= (int) sort_order[*vseg++] - (int) sort_order[*k++]))
break;
}
else
{
for (my_flag=0;left;left--)
if ((my_flag= (int) *vseg++ - (int) *k++))
break;
}
if (my_flag>0) /* mismatch */ if (my_flag>0) /* mismatch */
break; break;
......
...@@ -59,8 +59,10 @@ concat("-",a,"-",b,"-") ...@@ -59,8 +59,10 @@ concat("-",a,"-",b,"-")
-hello-hello- -hello-hello-
select concat("-",a,"-",b,"-") from t1 where b="hello "; select concat("-",a,"-",b,"-") from t1 where b="hello ";
concat("-",a,"-",b,"-") concat("-",a,"-",b,"-")
-hello-hello-
select concat("-",a,"-",b,"-") from t1 ignore index (b) where b="hello "; select concat("-",a,"-",b,"-") from t1 ignore index (b) where b="hello ";
concat("-",a,"-",b,"-") concat("-",a,"-",b,"-")
-hello-hello-
alter table t1 modify b tinytext not null, drop key b, add key (b(100)); alter table t1 modify b tinytext not null, drop key b, add key (b(100));
select concat("-",a,"-",b,"-") from t1; select concat("-",a,"-",b,"-") from t1;
concat("-",a,"-",b,"-") concat("-",a,"-",b,"-")
......
...@@ -397,3 +397,95 @@ select c as c_a from t1 where c='б'; ...@@ -397,3 +397,95 @@ select c as c_a from t1 where c='б';
c_a c_a
б б
drop table t1; drop table t1;
create table t1 (c varchar(30) character set utf8 collate utf8_bin, unique(c(10)));
insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z');
insert into t1 values ('aaaaaaaaaa');
insert into t1 values ('aaaaaaaaaaa');
ERROR 23000: Duplicate entry 'aaaaaaaaaaa' for key 1
insert into t1 values ('aaaaaaaaaaaa');
ERROR 23000: Duplicate entry 'aaaaaaaaaaaa' for key 1
insert into t1 values (repeat('b',20));
select c c1 from t1 where c='1';
c1
1
select c c2 from t1 where c='2';
c2
2
select c c3 from t1 where c='3';
c3
3
select c cx from t1 where c='x';
cx
x
select c cy from t1 where c='y';
cy
y
select c cz from t1 where c='z';
cz
z
select c ca10 from t1 where c='aaaaaaaaaa';
ca10
aaaaaaaaaa
select c cb20 from t1 where c=repeat('b',20);
cb20
bbbbbbbbbbbbbbbbbbbb
drop table t1;
create table t1 (c char(3) character set utf8 collate utf8_bin, unique (c(2)));
insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z');
insert into t1 values ('a');
insert into t1 values ('aa');
insert into t1 values ('aaa');
ERROR 23000: Duplicate entry 'aaa' for key 1
insert into t1 values ('b');
insert into t1 values ('bb');
insert into t1 values ('bbb');
ERROR 23000: Duplicate entry 'bbb' for key 1
insert into t1 values ('а');
insert into t1 values ('аа');
insert into t1 values ('ааа');
ERROR 23000: Duplicate entry 'ааа' for key 1
insert into t1 values ('б');
insert into t1 values ('бб');
insert into t1 values ('ббб');
ERROR 23000: Duplicate entry 'ббб' for key 1
insert into t1 values ('ꪪ');
insert into t1 values ('ꪪꪪ');
insert into t1 values ('ꪪꪪꪪ');
ERROR 23000: Duplicate entry 'ꪪꪪ' for key 1
drop table t1;
create table t1 (
c char(10) character set utf8 collate utf8_bin,
unique key a using btree (c(1))
) engine=heap;
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c` char(10) character set utf8 collate utf8_bin default NULL,
UNIQUE KEY `a` TYPE BTREE (`c`(1))
) ENGINE=HEAP DEFAULT CHARSET=latin1
insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f');
insert into t1 values ('aa');
ERROR 23000: Duplicate entry 'aa' for key 1
insert into t1 values ('aaa');
ERROR 23000: Duplicate entry 'aaa' for key 1
insert into t1 values ('б');
insert into t1 values ('бб');
ERROR 23000: Duplicate entry 'б' for key 1
insert into t1 values ('ббб');
ERROR 23000: Duplicate entry 'б' for key 1
select c as c_all from t1 order by c;
c_all
a
b
c
d
e
f
б
select c as c_a from t1 where c='a';
c_a
a
select c as c_a from t1 where c='б';
c_a
б
drop table t1;
...@@ -19,7 +19,7 @@ select 'a a' > 'a', 'a \0' < 'a'; ...@@ -19,7 +19,7 @@ select 'a a' > 'a', 'a \0' < 'a';
1 1 1 1
select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a'; select binary 'a a' > 'a', binary 'a \0' > 'a', binary 'a\0' > 'a';
binary 'a a' > 'a' binary 'a \0' > 'a' binary 'a\0' > 'a' binary 'a a' > 'a' binary 'a \0' > 'a' binary 'a\0' > 'a'
1 1 1 1 0 0
create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)); create table t1 (text1 varchar(32) not NULL, KEY key1 (text1));
insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); insert into t1 values ('teststring'), ('nothing'), ('teststring\t');
check table t1; check table t1;
......
...@@ -412,6 +412,7 @@ aaa. ...@@ -412,6 +412,7 @@ aaa.
aaa . aaa .
select concat(a,'.') from t1 where binary a='aaa'; select concat(a,'.') from t1 where binary a='aaa';
concat(a,'.') concat(a,'.')
aaa .
aaa. aaa.
update t1 set a='bbb' where a='aaa'; update t1 set a='bbb' where a='aaa';
select concat(a,'.') from t1; select concat(a,'.') from t1;
......
...@@ -189,7 +189,7 @@ drop table t2; ...@@ -189,7 +189,7 @@ drop table t2;
# #
# Bug 4521: unique key prefix interacts poorly with utf8 # Bug 4521: unique key prefix interacts poorly with utf8
# Check keys with prefix compression # MYISAM: keys with prefix compression, case insensitive collation.
# #
create table t1 (c varchar(30) character set utf8, unique(c(10))); create table t1 (c varchar(30) character set utf8, unique(c(10)));
insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z');
...@@ -211,7 +211,8 @@ drop table t1; ...@@ -211,7 +211,8 @@ drop table t1;
# #
# Bug 4521: unique key prefix interacts poorly with utf8 # Bug 4521: unique key prefix interacts poorly with utf8
# Check fixed length keys # MYISAM: fixed length keys, case insensitive collation
#
create table t1 (c char(3) character set utf8, unique (c(2))); create table t1 (c char(3) character set utf8, unique (c(2)));
insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z');
insert into t1 values ('a'); insert into t1 values ('a');
...@@ -283,3 +284,104 @@ select c as c_all from t1 order by c; ...@@ -283,3 +284,104 @@ select c as c_all from t1 order by c;
select c as c_a from t1 where c='a'; select c as c_a from t1 where c='a';
select c as c_a from t1 where c='б'; select c as c_a from t1 where c='б';
drop table t1; drop table t1;
#
# Bug 4521: unique key prefix interacts poorly with utf8
# MYISAM: keys with prefix compression, binary collation.
#
create table t1 (c varchar(30) character set utf8 collate utf8_bin, unique(c(10)));
insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z');
insert into t1 values ('aaaaaaaaaa');
--error 1062
insert into t1 values ('aaaaaaaaaaa');
--error 1062
insert into t1 values ('aaaaaaaaaaaa');
insert into t1 values (repeat('b',20));
select c c1 from t1 where c='1';
select c c2 from t1 where c='2';
select c c3 from t1 where c='3';
select c cx from t1 where c='x';
select c cy from t1 where c='y';
select c cz from t1 where c='z';
select c ca10 from t1 where c='aaaaaaaaaa';
select c cb20 from t1 where c=repeat('b',20);
drop table t1;
#
# Bug 4521: unique key prefix interacts poorly with utf8
# MYISAM: fixed length keys, binary collation
#
create table t1 (c char(3) character set utf8 collate utf8_bin, unique (c(2)));
insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z');
insert into t1 values ('a');
insert into t1 values ('aa');
--error 1062
insert into t1 values ('aaa');
insert into t1 values ('b');
insert into t1 values ('bb');
--error 1062
insert into t1 values ('bbb');
insert into t1 values ('а');
insert into t1 values ('аа');
--error 1062
insert into t1 values ('ааа');
insert into t1 values ('б');
insert into t1 values ('бб');
--error 1062
insert into t1 values ('ббб');
insert into t1 values ('ꪪ');
insert into t1 values ('ꪪꪪ');
--error 1062
insert into t1 values ('ꪪꪪꪪ');
drop table t1;
#
# Bug 4531: unique key prefix interacts poorly with utf8
# Check HEAP+HASH, binary collation
#
# This doesn't work correctly yet.
#
#create table t1 (
#c char(10) character set utf8 collate utf8_bin,
#unique key a using hash (c(1))
#) engine=heap;
#show create table t1;
#insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f');
#--error 1062
#insert into t1 values ('aa');
#--error 1062
#insert into t1 values ('aaa');
#insert into t1 values ('б');
#--error 1062
#insert into t1 values ('бб');
#--error 1062
#insert into t1 values ('ббб');
#select c as c_all from t1 order by c;
#select c as c_a from t1 where c='a';
#select c as c_a from t1 where c='б';
#drop table t1;
#
# Bug 4531: unique key prefix interacts poorly with utf8
# Check HEAP+BTREE, binary collation
#
create table t1 (
c char(10) character set utf8 collate utf8_bin,
unique key a using btree (c(1))
) engine=heap;
show create table t1;
insert into t1 values ('a'),('b'),('c'),('d'),('e'),('f');
--error 1062
insert into t1 values ('aa');
--error 1062
insert into t1 values ('aaa');
insert into t1 values ('б');
--error 1062
insert into t1 values ('бб');
--error 1062
insert into t1 values ('ббб');
select c as c_all from t1 order by c;
select c as c_a from t1 where c='a';
select c as c_a from t1 where c='б';
drop table t1;
...@@ -357,7 +357,7 @@ public: ...@@ -357,7 +357,7 @@ public:
uint size_of() const { return sizeof(*this); } uint size_of() const { return sizeof(*this); }
CHARSET_INFO *charset(void) const { return field_charset; } CHARSET_INFO *charset(void) const { return field_charset; }
void set_charset(CHARSET_INFO *charset) { field_charset=charset; } void set_charset(CHARSET_INFO *charset) { field_charset=charset; }
bool binary() const { return field_charset->state & MY_CS_BINSORT ? 1 : 0; } bool binary() const { return field_charset == &my_charset_bin; }
uint32 max_length() { return field_length; } uint32 max_length() { return field_length; }
friend class create_field; friend class create_field;
}; };
......
...@@ -357,9 +357,11 @@ ulong ha_berkeley::index_flags(uint idx, uint part, bool all_parts) const ...@@ -357,9 +357,11 @@ ulong ha_berkeley::index_flags(uint idx, uint part, bool all_parts) const
case HA_KEYTYPE_VARTEXT: case HA_KEYTYPE_VARTEXT:
/* /*
As BDB stores only one copy of equal strings, we can't use key read As BDB stores only one copy of equal strings, we can't use key read
on these on these. Binary collations do support key read though.
*/ */
flags&= ~HA_KEYREAD_ONLY; if (!(table->key_info[idx].key_part[i].field->charset()->state
& MY_CS_BINSORT))
flags&= ~HA_KEYREAD_ONLY;
break; break;
default: // Keep compiler happy default: // Keep compiler happy
break; break;
......
...@@ -303,10 +303,10 @@ int Arg_comparator::set_compare_func(Item_bool_func2 *item, Item_result type) ...@@ -303,10 +303,10 @@ int Arg_comparator::set_compare_func(Item_bool_func2 *item, Item_result type)
my_coll_agg_error((*a)->collation, (*b)->collation, owner->func_name()); my_coll_agg_error((*a)->collation, (*b)->collation, owner->func_name());
return 1; return 1;
} }
if (my_binary_compare(cmp_collation.collation)) if (cmp_collation.collation == &my_charset_bin)
{ {
/* /*
We are using binary collation, change to compare byte by byte, We are using BLOB/BINARY/VARBINARY, change to compare byte by byte,
without removing end space without removing end space
*/ */
if (func == &Arg_comparator::compare_string) if (func == &Arg_comparator::compare_string)
......
...@@ -68,11 +68,22 @@ static uchar bin_char_array[] = ...@@ -68,11 +68,22 @@ static uchar bin_char_array[] =
static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, uint slen,
const uchar *t, uint tlen,
my_bool t_is_prefix)
{
uint len=min(slen,tlen);
int cmp= memcmp(s,t,len);
return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
}
/* /*
Compare two strings. Result is sign(first_argument - second_argument) Compare two strings. Result is sign(first_argument - second_argument)
SYNOPSIS SYNOPSIS
my_strnncoll_binary() my_strnncollsp_binary()
cs Chararacter set cs Chararacter set
s String to compare s String to compare
slen Length of 's' slen Length of 's'
...@@ -80,8 +91,9 @@ static uchar bin_char_array[] = ...@@ -80,8 +91,9 @@ static uchar bin_char_array[] =
tlen Length of 't' tlen Length of 't'
NOTE NOTE
This is used also when comparing with end space removal, as end space This function is used for real binary strings, i.e. for
is significant for binary strings BLOB, BINARY(N) and VARBINARY(N).
It does not ignore trailing spaces.
RETURN RETURN
< 0 s < t < 0 s < t
...@@ -89,10 +101,18 @@ static uchar bin_char_array[] = ...@@ -89,10 +101,18 @@ static uchar bin_char_array[] =
> 0 s > t > 0 s > t
*/ */
static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)), static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, uint slen, const uchar *s, uint slen,
const uchar *t, uint tlen, const uchar *t, uint tlen)
my_bool t_is_prefix) {
return my_strnncoll_binary(cs,s,slen,t,tlen,0);
}
static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, uint slen,
const uchar *t, uint tlen,
my_bool t_is_prefix)
{ {
uint len=min(slen,tlen); uint len=min(slen,tlen);
int cmp= memcmp(s,t,len); int cmp= memcmp(s,t,len);
...@@ -100,11 +120,61 @@ static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)), ...@@ -100,11 +120,61 @@ static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)),
} }
static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)), /*
const uchar *s, uint slen, Compare two strings. Result is sign(first_argument - second_argument)
const uchar *t, uint tlen)
SYNOPSIS
my_strnncollsp_8bit_bin()
cs Chararacter set
s String to compare
slen Length of 's'
t String to compare
tlen Length of 't'
NOTE
This function is used for character strings with binary collations.
It ignores trailing spaces.
RETURN
< 0 s < t
0 s == t
> 0 s > t
*/
static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *a, uint a_length,
const uchar *b, uint b_length)
{ {
return my_strnncoll_binary(cs,s,slen,t,tlen,0); const uchar *end;
uint length;
end= a + (length= min(a_length, b_length));
while (a < end)
{
if (*a++ != *b++)
return ((int) a[-1] - (int) b[-1]);
}
if (a_length != b_length)
{
int swap= 0;
/*
Check the next not space character of the longer key. If it's < ' ',
then it's smaller than the other key.
*/
if (a_length < b_length)
{
/* put shorter key in s */
a_length= b_length;
a= b;
swap= -1; /* swap sign of result */
}
for (end= a + a_length-length; a < end ; a++)
{
if (*a != ' ')
return ((int) *a - (int) ' ') ^ swap;
}
}
return 0;
} }
...@@ -342,6 +412,20 @@ skip: ...@@ -342,6 +412,20 @@ skip:
MY_COLLATION_HANDLER my_collation_8bit_bin_handler = MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
{
NULL, /* init */
my_strnncoll_8bit_bin,
my_strnncollsp_8bit_bin,
my_strnxfrm_bin,
my_like_range_simple,
my_wildcmp_bin,
my_strcasecmp_bin,
my_instr_bin,
my_hash_sort_bin
};
static MY_COLLATION_HANDLER my_collation_binary_handler =
{ {
NULL, /* init */ NULL, /* init */
my_strnncoll_binary, my_strnncoll_binary,
...@@ -407,5 +491,5 @@ CHARSET_INFO my_charset_bin = ...@@ -407,5 +491,5 @@ CHARSET_INFO my_charset_bin =
0, /* min_sort_char */ 0, /* min_sort_char */
255, /* max_sort_char */ 255, /* max_sort_char */
&my_charset_handler, &my_charset_handler,
&my_collation_8bit_bin_handler &my_collation_binary_handler
}; };
...@@ -360,11 +360,62 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), ...@@ -360,11 +360,62 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen); return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
} }
/*
Compare two strings.
SYNOPSIS
my_strnncollsp_mb_bin()
cs Chararacter set
s String to compare
slen Length of 's'
t String to compare
tlen Length of 't'
NOTE
This function is used for character strings with binary collations.
It ignores trailing spaces.
RETURN
A negative number if s < t
A positive number if s > t
0 if strings are equal
*/
static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, uint slen, const uchar *a, uint a_length,
const uchar *t, uint tlen) const uchar *b, uint b_length)
{ {
return my_strnncoll_mb_bin(cs,s,slen,t,tlen,0); const uchar *end;
uint length;
end= a + (length= min(a_length, b_length));
while (a < end)
{
if (*a++ != *b++)
return ((int) a[-1] - (int) b[-1]);
}
if (a_length != b_length)
{
int swap= 0;
/*
Check the next not space character of the longer key. If it's < ' ',
then it's smaller than the other key.
*/
if (a_length < b_length)
{
/* put shorter key in s */
a_length= b_length;
a= b;
swap= -1; /* swap sign of result */
}
for (end= a + a_length-length; a < end ; a++)
{
if (*a != ' ')
return ((int) *a - (int) ' ') ^ swap;
}
}
return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment