Bug#23451 GROUP_CONCAT truncates a multibyte utf8 character

  
  Problem: GROUP_CONCAT on a multi-byte column can truncate
  in the middle of a multibyte character when applying
  group_concat_max_len limit. It produces an invalid
  multi-byte character in the result string.
  
The second, easier version - reusing old "warning_for_row" flag,
instead of introducing of "result_is_full" - which was
added in the previous commit.
parent 31abddb5
......@@ -599,12 +599,72 @@ count(distinct (f1+1))
1
3
drop table t1;
set names utf8;
create table t1
(
x text character set utf8 not null,
y integer not null
);
insert into t1 values (repeat('a', 1022), 0), (repeat(_utf8 0xc3b7, 4), 0);
set group_concat_max_len= 1022 + 10;
select @x:=group_concat(x) from t1 group by y;
select @@group_concat_max_len, length(@x), char_length(@x), right(@x,12), right(HEX(@x),12);
@@group_concat_max_len length(@x) char_length(@x) right(@x,12) right(HEX(@x),12)
1032 1031 1027 aaaaaaa,÷÷÷÷ C3B7C3B7C3B7
set group_concat_max_len= 1022 + 9;
select @x:=group_concat(x) from t1 group by y;
select @@group_concat_max_len, length(@x), char_length(@x), right(@x,12), right(HEX(@x),12);
@@group_concat_max_len length(@x) char_length(@x) right(@x,12) right(HEX(@x),12)
1031 1031 1027 aaaaaaa,÷÷÷÷ C3B7C3B7C3B7
set group_concat_max_len= 1022 + 8;
select @x:=group_concat(x) from t1 group by y;
select @@group_concat_max_len, length(@x), char_length(@x), right(@x,12), right(HEX(@x),12);
@@group_concat_max_len length(@x) char_length(@x) right(@x,12) right(HEX(@x),12)
1030 1029 1026 aaaaaaaa,÷÷÷ C3B7C3B7C3B7
set group_concat_max_len= 1022 + 7;
select @x:=group_concat(x) from t1 group by y;
select @@group_concat_max_len, length(@x), char_length(@x), right(@x,12), right(HEX(@x),12);
@@group_concat_max_len length(@x) char_length(@x) right(@x,12) right(HEX(@x),12)
1029 1029 1026 aaaaaaaa,÷÷÷ C3B7C3B7C3B7
set group_concat_max_len= 1022 + 6;
select @x:=group_concat(x) from t1 group by y;
select @@group_concat_max_len, length(@x), char_length(@x), right(@x,12), right(HEX(@x),12);
@@group_concat_max_len length(@x) char_length(@x) right(@x,12) right(HEX(@x),12)
1028 1027 1025 aaaaaaaaa,÷÷ 612CC3B7C3B7
set group_concat_max_len= 1022 + 5;
select @x:=group_concat(x) from t1 group by y;
select @@group_concat_max_len, length(@x), char_length(@x), right(@x,12), right(HEX(@x),12);
@@group_concat_max_len length(@x) char_length(@x) right(@x,12) right(HEX(@x),12)
1027 1027 1025 aaaaaaaaa,÷÷ 612CC3B7C3B7
set group_concat_max_len= 1022 + 4;
select @x:=group_concat(x) from t1 group by y;
select @@group_concat_max_len, length(@x), char_length(@x), right(@x,12), right(HEX(@x),12);
@@group_concat_max_len length(@x) char_length(@x) right(@x,12) right(HEX(@x),12)
1026 1025 1024 aaaaaaaaaa,÷ 6161612CC3B7
set group_concat_max_len= 1022 + 3;
select @x:=group_concat(x) from t1 group by y;
select @@group_concat_max_len, length(@x), char_length(@x), right(@x,12), right(HEX(@x),12);
@@group_concat_max_len length(@x) char_length(@x) right(@x,12) right(HEX(@x),12)
1025 1025 1024 aaaaaaaaaa,÷ 6161612CC3B7
set group_concat_max_len= 1022 + 2;
select @x:=group_concat(x) from t1 group by y;
select @@group_concat_max_len, length(@x), char_length(@x), right(@x,12), right(HEX(@x),12);
@@group_concat_max_len length(@x) char_length(@x) right(@x,12) right(HEX(@x),12)
1024 1023 1023 aaaaaaaaaaa, 61616161612C
set group_concat_max_len= 1022 + 1;
select @x:=group_concat(x) from t1 group by y;
select @@group_concat_max_len, length(@x), char_length(@x), right(@x,12), right(HEX(@x),12);
@@group_concat_max_len length(@x) char_length(@x) right(@x,12) right(HEX(@x),12)
1023 1023 1023 aaaaaaaaaaa, 61616161612C
drop table t1;
set group_concat_max_len=1024;
set names latin1;
create table t1 (f1 int unsigned, f2 varchar(255));
insert into t1 values (1,repeat('a',255)),(2,repeat('b',255));
select f2,group_concat(f1) from t1 group by f2;
Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr
def test t1 t1 f2 f2 253 255 255 Y 0 0 8
def group_concat(f1) 252 400 1 Y 128 0 63
def group_concat(f1) 252 1024 1 Y 128 0 63
f2 group_concat(f1)
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1
bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb 2
......
......@@ -390,6 +390,30 @@ select f1, group_concat(f1+1) from t1 group by f1 with rollup;
select count(distinct (f1+1)) from t1 group by f1 with rollup;
drop table t1;
#
# Bug#23451 GROUP_CONCAT truncates a multibyte utf8 character
#
set names utf8;
create table t1
(
x text character set utf8 not null,
y integer not null
);
insert into t1 values (repeat('a', 1022), 0), (repeat(_utf8 0xc3b7, 4), 0);
let $1= 10;
while ($1)
{
eval set group_concat_max_len= 1022 + $1;
--disable_result_log
select @x:=group_concat(x) from t1 group by y;
--enable_result_log
select @@group_concat_max_len, length(@x), char_length(@x), right(@x,12), right(HEX(@x),12);
dec $1;
}
drop table t1;
set group_concat_max_len=1024;
set names latin1;
#
# Bug#14169 type of group_concat() result changed to blob if tmp_table was used
#
......
......@@ -1664,6 +1664,7 @@ int dump_leaf_key(byte* key, uint32 count __attribute__((unused)),
{
char buff[MAX_FIELD_WIDTH];
String tmp((char *)&buff,sizeof(buff),default_charset_info), tmp2;
uint old_length= item->result.length();
if (item->no_appended)
item->no_appended= FALSE;
......@@ -1702,8 +1703,22 @@ int dump_leaf_key(byte* key, uint32 count __attribute__((unused)),
/* stop if length of result more than group_concat_max_len */
if (item->result.length() > item->group_concat_max_len)
{
int well_formed_error;
CHARSET_INFO *cs= item->collation.collation;
const char *ptr= item->result.ptr();
uint add_length;
/*
It's ok to use item->result.length() as the fourth argument
as this is never used to limit the length of the data.
Cut is done with the third argument.
*/
add_length= cs->cset->well_formed_len(cs,
ptr + old_length,
ptr + item->group_concat_max_len,
item->result.length(),
&well_formed_error);
item->result.length(old_length + add_length);
item->count_cut_values++;
item->result.length(item->group_concat_max_len);
item->warning_for_row= TRUE;
return 1;
}
......@@ -1893,8 +1908,7 @@ bool Item_func_group_concat::add()
we can dump the row here in case of GROUP_CONCAT(DISTINCT...)
instead of doing tree traverse later.
*/
if (result.length() <= group_concat_max_len &&
!warning_for_row &&
if (!warning_for_row &&
(!tree_mode || (el->count == 1 && distinct && !arg_count_order)))
dump_leaf_key(table->record[0], 1, this);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment