Commit 88680a99 authored by Neeraj Bisht's avatar Neeraj Bisht

Bug#16691598 - ORDER BY LOWER(COLUMN) PRODUCES OUT-OF-ORDER RESULTS

Problem:-
We have created a table with UTF8_BIN collation.
In case, when in our query we have ORDER BY clause over a function 
call we are getting result in incorrect order.
Note:the bug is not there in 5.5.

Analysis:
In 5.5, for UTF16_BIN, we have min and max multi-byte length is 2 and 4 
respectively.In make_sortkey(),for 2 byte character character we are 
assuming that the resultant length will be 2 byte/character. But when we 
use my_strnxfrm_unicode_full_bin(), we store sorting weights using 3 bytes 
per character.This result in truncated result.

Same thing happen for UTF8MB4, where we have 1 byte min multi-byte and 
4 byte max multi-byte.We will accsume resultant data as 1 byte/character, 
which result in truncated result.

Solution:-
use strnxfrm(means use of MY_CS_STRNXFRM macro) is used for sort, in 
which the resultant length is not dependent on source length.
parent e6949c24
...@@ -14,3 +14,12 @@ SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a; ...@@ -14,3 +14,12 @@ SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a;
ALTER TABLE t1 ADD KEY(a); ALTER TABLE t1 ADD KEY(a);
SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a; SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a;
DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t1;
--echo #
--echo # BUG#16691598 - ORDER BY LOWER(COLUMN) PRODUCES
--echo # OUT-OF-ORDER RESULTS
--echo #
CREATE TABLE t1 SELECT ('a a') as n;
INSERT INTO t1 VALUES('a b');
SELECT * FROM t1 ORDER BY LOWER(n) ASC;
SELECT * FROM t1 ORDER BY LOWER(n) DESC;
DROP TABLE t1;
...@@ -636,6 +636,21 @@ FF9D EFBE9D ...@@ -636,6 +636,21 @@ FF9D EFBE9D
D800DF84 F0908E84 D800DF84 F0908E84
DBC0DC00 F4808080 DBC0DC00 F4808080
DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t1;
#
# BUG#16691598 - ORDER BY LOWER(COLUMN) PRODUCES
# OUT-OF-ORDER RESULTS
#
CREATE TABLE t1 SELECT ('a a') as n;
INSERT INTO t1 VALUES('a b');
SELECT * FROM t1 ORDER BY LOWER(n) ASC;
n
a a
a b
SELECT * FROM t1 ORDER BY LOWER(n) DESC;
n
a b
a a
DROP TABLE t1;
select @@collation_connection; select @@collation_connection;
@@collation_connection @@collation_connection
utf16_bin utf16_bin
......
...@@ -635,6 +635,21 @@ HEX(a) HEX(CONVERT(a USING utf8mb4)) ...@@ -635,6 +635,21 @@ HEX(a) HEX(CONVERT(a USING utf8mb4))
00010384 F0908E84 00010384 F0908E84
00100000 F4808080 00100000 F4808080
DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t1;
#
# BUG#16691598 - ORDER BY LOWER(COLUMN) PRODUCES
# OUT-OF-ORDER RESULTS
#
CREATE TABLE t1 SELECT ('a a') as n;
INSERT INTO t1 VALUES('a b');
SELECT * FROM t1 ORDER BY LOWER(n) ASC;
n
a a
a b
SELECT * FROM t1 ORDER BY LOWER(n) DESC;
n
a b
a a
DROP TABLE t1;
select @@collation_connection; select @@collation_connection;
@@collation_connection @@collation_connection
utf32_bin utf32_bin
......
...@@ -1012,6 +1012,21 @@ EFBE9D EFBE9D ...@@ -1012,6 +1012,21 @@ EFBE9D EFBE9D
F0908E84 F0908E84 F0908E84 F0908E84
F4808080 F4808080 F4808080 F4808080
DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t1;
#
# BUG#16691598 - ORDER BY LOWER(COLUMN) PRODUCES
# OUT-OF-ORDER RESULTS
#
CREATE TABLE t1 SELECT ('a a') as n;
INSERT INTO t1 VALUES('a b');
SELECT * FROM t1 ORDER BY LOWER(n) ASC;
n
a a
a b
SELECT * FROM t1 ORDER BY LOWER(n) DESC;
n
a b
a a
DROP TABLE t1;
select @@collation_connection; select @@collation_connection;
@@collation_connection @@collation_connection
utf8mb4_bin utf8mb4_bin
......
...@@ -813,8 +813,6 @@ static void make_sortkey(register SORTPARAM *param, ...@@ -813,8 +813,6 @@ static void make_sortkey(register SORTPARAM *param,
{ {
CHARSET_INFO *cs=item->collation.collation; CHARSET_INFO *cs=item->collation.collation;
char fill_char= ((cs->state & MY_CS_BINSORT) ? (char) 0 : ' '); char fill_char= ((cs->state & MY_CS_BINSORT) ? (char) 0 : ' ');
int diff;
uint sort_field_length;
if (maybe_null) if (maybe_null)
*to++=1; *to++=1;
...@@ -842,25 +840,13 @@ static void make_sortkey(register SORTPARAM *param, ...@@ -842,25 +840,13 @@ static void make_sortkey(register SORTPARAM *param,
break; break;
} }
length= res->length(); length= res->length();
sort_field_length= sort_field->length - sort_field->suffix_length;
diff=(int) (sort_field_length - length);
if (diff < 0)
{
diff=0;
length= sort_field_length;
}
if (sort_field->suffix_length)
{
/* Store length last in result_string */
store_length(to + sort_field_length, length,
sort_field->suffix_length);
}
if (sort_field->need_strxnfrm) if (sort_field->need_strxnfrm)
{ {
char *from=(char*) res->ptr(); char *from=(char*) res->ptr();
uint tmp_length; uint tmp_length;
if ((uchar*) from == to) if ((uchar*) from == to)
{ {
DBUG_ASSERT(sort_field->length >= length);
set_if_smaller(length,sort_field->length); set_if_smaller(length,sort_field->length);
memcpy(param->tmp_buffer,from,length); memcpy(param->tmp_buffer,from,length);
from=param->tmp_buffer; from=param->tmp_buffer;
...@@ -871,6 +857,22 @@ static void make_sortkey(register SORTPARAM *param, ...@@ -871,6 +857,22 @@ static void make_sortkey(register SORTPARAM *param,
} }
else else
{ {
uint diff;
uint sort_field_length= sort_field->length -
sort_field->suffix_length;
if (sort_field_length < length)
{
diff= 0;
length= sort_field_length;
}
else
diff= sort_field_length - length;
if (sort_field->suffix_length)
{
/* Store length last in result_string */
store_length(to + sort_field_length, length,
sort_field->suffix_length);
}
my_strnxfrm(cs,(uchar*)to,length,(const uchar*)res->ptr(),length); my_strnxfrm(cs,(uchar*)to,length,(const uchar*)res->ptr(),length);
cs->cset->fill(cs, (char *)to+length,diff,fill_char); cs->cset->fill(cs, (char *)to+length,diff,fill_char);
} }
......
...@@ -1664,7 +1664,7 @@ CHARSET_INFO my_charset_utf16_general_ci= ...@@ -1664,7 +1664,7 @@ CHARSET_INFO my_charset_utf16_general_ci=
CHARSET_INFO my_charset_utf16_bin= CHARSET_INFO my_charset_utf16_bin=
{ {
55,0,0, /* number */ 55,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII, MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
"utf16", /* cs name */ "utf16", /* cs name */
"utf16_bin", /* name */ "utf16_bin", /* name */
"UTF-16 Unicode", /* comment */ "UTF-16 Unicode", /* comment */
......
...@@ -5435,7 +5435,8 @@ CHARSET_INFO my_charset_utf8mb4_general_ci= ...@@ -5435,7 +5435,8 @@ CHARSET_INFO my_charset_utf8mb4_general_ci=
CHARSET_INFO my_charset_utf8mb4_bin= CHARSET_INFO my_charset_utf8mb4_bin=
{ {
46,0,0, /* number */ 46,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_UNICODE_SUPPLEMENT, /* state */ MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|
MY_CS_UNICODE_SUPPLEMENT, /* state */
MY_UTF8MB4, /* cs name */ MY_UTF8MB4, /* cs name */
MY_UTF8MB4_BIN, /* name */ MY_UTF8MB4_BIN, /* name */
"UTF-8 Unicode", /* comment */ "UTF-8 Unicode", /* comment */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment