Bug#32798: DISTINCT in GROUP_CONCAT clause fails when ordering by a column

with null values

For queries containing GROUP_CONCAT(DISTINCT fields ORDER BY fields), there 
was a limitation that the DISTINCT fields had to be the same as ORDER BY 
fields, owing to the fact that one single sorted tree was used for keeping 
track of tuples, ordering and uniqueness. Fixed by introducing a second 
structure to handle uniqueness so that the original structure has only to 
order the result.
parent 7f67efcc
...@@ -271,7 +271,7 @@ group_concat(distinct s1 order by s2) ...@@ -271,7 +271,7 @@ group_concat(distinct s1 order by s2)
c,b,a c,b,a
select group_concat(distinct s1 order by s2) from t1; select group_concat(distinct s1 order by s2) from t1;
group_concat(distinct s1 order by s2) group_concat(distinct s1 order by s2)
c,b,a,c c,b,a
drop table t1; drop table t1;
create table t1 (a int, c int); create table t1 (a int, c int);
insert into t1 values (1, 2), (2, 3), (2, 4), (3, 5); insert into t1 values (1, 2), (2, 3), (2, 4), (3, 5);
...@@ -819,4 +819,65 @@ id group_concat(b.name) ...@@ -819,4 +819,65 @@ id group_concat(b.name)
1 ra,ra 1 ra,ra
2 ra,ra 2 ra,ra
drop table t1; drop table t1;
CREATE TABLE t1 (a INT, b INT);
INSERT INTO t1 VALUES (1, 1), (2, 2), (2, 3);
SELECT GROUP_CONCAT(DISTINCT a ORDER BY b) FROM t1;
GROUP_CONCAT(DISTINCT a ORDER BY b)
1,2
SELECT GROUP_CONCAT(DISTINCT a ORDER BY b DESC) FROM t1;
GROUP_CONCAT(DISTINCT a ORDER BY b DESC)
2,1
SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
GROUP_CONCAT(DISTINCT a)
1,2
SELECT GROUP_CONCAT(DISTINCT a + 1 ORDER BY 3 - b) FROM t1;
GROUP_CONCAT(DISTINCT a + 1 ORDER BY 3 - b)
3,2
SELECT GROUP_CONCAT(DISTINCT a + 1 ORDER BY b) FROM t1;
GROUP_CONCAT(DISTINCT a + 1 ORDER BY b)
2,3
SELECT GROUP_CONCAT(a ORDER BY 3 - b) FROM t1;
GROUP_CONCAT(a ORDER BY 3 - b)
2,2,1
CREATE TABLE t2 (a INT, b INT, c INT, d INT);
INSERT INTO t2 VALUES (1,1, 1,1), (1,1, 2,2), (1,2, 2,1), (2,1, 1,2);
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY c, d) FROM t2;
GROUP_CONCAT(DISTINCT a, b ORDER BY c, d)
11,21,12
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY d, c) FROM t2;
GROUP_CONCAT(DISTINCT a, b ORDER BY d, c)
11,12,21
CREATE TABLE t3 (a INT, b INT, c INT);
INSERT INTO t3 VALUES (1, 1, 1), (2, 1, 2), (3, 2, 1);
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY b, c) FROM t3;
GROUP_CONCAT(DISTINCT a, b ORDER BY b, c)
11,21,32
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY c, b) FROM t3;
GROUP_CONCAT(DISTINCT a, b ORDER BY c, b)
11,32,21
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY a, b) FROM t1;
GROUP_CONCAT(DISTINCT a, b ORDER BY a, b)
11,22,23
SELECT GROUP_CONCAT(DISTINCT b, a ORDER BY a, b) FROM t1;
GROUP_CONCAT(DISTINCT b, a ORDER BY a, b)
11,22,32
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY b, a) FROM t1;
GROUP_CONCAT(DISTINCT a, b ORDER BY b, a)
11,22,23
SELECT GROUP_CONCAT(DISTINCT b, a ORDER BY a, b) FROM t1;
GROUP_CONCAT(DISTINCT b, a ORDER BY a, b)
11,22,32
SELECT GROUP_CONCAT(DISTINCT a ORDER BY a, b) FROM t1;
GROUP_CONCAT(DISTINCT a ORDER BY a, b)
1,2
SELECT GROUP_CONCAT(DISTINCT b ORDER BY b, a) FROM t1;
GROUP_CONCAT(DISTINCT b ORDER BY b, a)
1,2,3
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY a) FROM t1;
GROUP_CONCAT(DISTINCT a, b ORDER BY a)
11,23,22
SELECT GROUP_CONCAT(DISTINCT b, a ORDER BY b) FROM t1;
GROUP_CONCAT(DISTINCT b, a ORDER BY b)
11,22,32
DROP TABLE t1, t2, t3;
End of 5.0 tests End of 5.0 tests
...@@ -562,4 +562,47 @@ insert into t1 (id, name) values (2, " ...@@ -562,4 +562,47 @@ insert into t1 (id, name) values (2, "
select b.id, group_concat(b.name) from t1 a, t1 b group by b.id; select b.id, group_concat(b.name) from t1 a, t1 b group by b.id;
drop table t1; drop table t1;
#
# Bug#32798: DISTINCT in GROUP_CONCAT clause fails when ordering by a column
# with null values
#'
CREATE TABLE t1 (a INT, b INT);
INSERT INTO t1 VALUES (1, 1), (2, 2), (2, 3);
SELECT GROUP_CONCAT(DISTINCT a ORDER BY b) FROM t1;
SELECT GROUP_CONCAT(DISTINCT a ORDER BY b DESC) FROM t1;
SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
SELECT GROUP_CONCAT(DISTINCT a + 1 ORDER BY 3 - b) FROM t1;
SELECT GROUP_CONCAT(DISTINCT a + 1 ORDER BY b) FROM t1;
SELECT GROUP_CONCAT(a ORDER BY 3 - b) FROM t1;
CREATE TABLE t2 (a INT, b INT, c INT, d INT);
# There is one duplicate in the expression list: 1,10
# There is one duplicate in ORDER BY list, but that shouldnt matter: 1,10
INSERT INTO t2 VALUES (1,1, 1,1), (1,1, 2,2), (1,2, 2,1), (2,1, 1,2);
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY c, d) FROM t2;
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY d, c) FROM t2;
CREATE TABLE t3 (a INT, b INT, c INT);
INSERT INTO t3 VALUES (1, 1, 1), (2, 1, 2), (3, 2, 1);
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY b, c) FROM t3;
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY c, b) FROM t3;
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY a, b) FROM t1;
SELECT GROUP_CONCAT(DISTINCT b, a ORDER BY a, b) FROM t1;
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY b, a) FROM t1;
SELECT GROUP_CONCAT(DISTINCT b, a ORDER BY a, b) FROM t1;
SELECT GROUP_CONCAT(DISTINCT a ORDER BY a, b) FROM t1;
SELECT GROUP_CONCAT(DISTINCT b ORDER BY b, a) FROM t1;
SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY a) FROM t1;
SELECT GROUP_CONCAT(DISTINCT b, a ORDER BY b) FROM t1;
DROP TABLE t1, t2, t3;
--echo End of 5.0 tests --echo End of 5.0 tests
...@@ -2831,44 +2831,51 @@ String *Item_sum_udf_str::val_str(String *str) ...@@ -2831,44 +2831,51 @@ String *Item_sum_udf_str::val_str(String *str)
concat of values from "group by" operation concat of values from "group by" operation
BUGS BUGS
DISTINCT and ORDER BY only works if ORDER BY uses all fields and only fields
in expression list
Blobs doesn't work with DISTINCT or ORDER BY Blobs doesn't work with DISTINCT or ORDER BY
*****************************************************************************/ *****************************************************************************/
/*
function of sort for syntax:
GROUP_CONCAT(DISTINCT expr,...) /**
Compares the values for fields in expr list of GROUP_CONCAT.
@note
GROUP_CONCAT([DISTINCT] expr [,expr ...]
[ORDER BY {unsigned_integer | col_name | expr}
[ASC | DESC] [,col_name ...]]
[SEPARATOR str_val])
@return
@retval -1 : key1 < key2
@retval 0 : key1 = key2
@retval 1 : key1 > key2
*/ */
int group_concat_key_cmp_with_distinct(void* arg, byte* key1, int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
byte* key2) const void* key2)
{ {
Item_func_group_concat* grp_item= (Item_func_group_concat*)arg; Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
TABLE *table= grp_item->table; TABLE *table= item_func->table;
Item **field_item, **end;
for (field_item= grp_item->args, end= field_item + grp_item->arg_count_field; for (uint i= 0; i < item_func->arg_count_field; i++)
field_item < end;
field_item++)
{ {
Item *item= item_func->args[i];
/*
If field_item is a const item then either get_tp_table_field returns 0
or it is an item over a const table.
*/
if (item->const_item())
continue;
/* /*
We have to use get_tmp_table_field() instead of We have to use get_tmp_table_field() instead of
real_item()->get_tmp_table_field() because we want the field in real_item()->get_tmp_table_field() because we want the field in
the temporary table, not the original field the temporary table, not the original field
*/ */
Field *field= (*field_item)->get_tmp_table_field(); Field *field= item->get_tmp_table_field();
/* int res;
If field_item is a const item then either get_tp_table_field returns 0 uint offset= field->offset()-table->s->null_bytes;
or it is an item over a const table. if((res= field->cmp((char*)key1 + offset, (char*)key2 + offset)))
*/ return res;
if (field && !(*field_item)->const_item())
{
int res;
uint offset= field->offset() - table->s->null_bytes;
if ((res= field->cmp((char *) key1 + offset, (char *) key2 + offset)))
return res;
}
} }
return 0; return 0;
} }
...@@ -2879,7 +2886,8 @@ int group_concat_key_cmp_with_distinct(void* arg, byte* key1, ...@@ -2879,7 +2886,8 @@ int group_concat_key_cmp_with_distinct(void* arg, byte* key1,
GROUP_CONCAT(expr,... ORDER BY col,... ) GROUP_CONCAT(expr,... ORDER BY col,... )
*/ */
int group_concat_key_cmp_with_order(void* arg, byte* key1, byte* key2) int group_concat_key_cmp_with_order(void* arg, const void* key1,
const void* key2)
{ {
Item_func_group_concat* grp_item= (Item_func_group_concat*) arg; Item_func_group_concat* grp_item= (Item_func_group_concat*) arg;
ORDER **order_item, **end; ORDER **order_item, **end;
...@@ -2917,25 +2925,6 @@ int group_concat_key_cmp_with_order(void* arg, byte* key1, byte* key2) ...@@ -2917,25 +2925,6 @@ int group_concat_key_cmp_with_order(void* arg, byte* key1, byte* key2)
} }
/*
function of sort for syntax:
GROUP_CONCAT(DISTINCT expr,... ORDER BY col,... )
BUG:
This doesn't work in the case when the order by contains data that
is not part of the field list because tree-insert will not notice
the duplicated values when inserting things sorted by ORDER BY
*/
int group_concat_key_cmp_with_distinct_and_order(void* arg,byte* key1,
byte* key2)
{
if (!group_concat_key_cmp_with_distinct(arg,key1,key2))
return 0;
return(group_concat_key_cmp_with_order(arg,key1,key2));
}
/* /*
Append data from current leaf to item->result Append data from current leaf to item->result
*/ */
...@@ -3020,7 +3009,7 @@ Item_func_group_concat(Name_resolution_context *context_arg, ...@@ -3020,7 +3009,7 @@ Item_func_group_concat(Name_resolution_context *context_arg,
bool distinct_arg, List<Item> *select_list, bool distinct_arg, List<Item> *select_list,
SQL_LIST *order_list, String *separator_arg) SQL_LIST *order_list, String *separator_arg)
:tmp_table_param(0), warning(0), :tmp_table_param(0), warning(0),
separator(separator_arg), tree(0), table(0), separator(separator_arg), tree(0), unique_filter(NULL), table(0),
order(0), context(context_arg), order(0), context(context_arg),
arg_count_order(order_list ? order_list->elements : 0), arg_count_order(order_list ? order_list->elements : 0),
arg_count_field(select_list->elements), arg_count_field(select_list->elements),
...@@ -3075,6 +3064,7 @@ Item_func_group_concat::Item_func_group_concat(THD *thd, ...@@ -3075,6 +3064,7 @@ Item_func_group_concat::Item_func_group_concat(THD *thd,
warning(item->warning), warning(item->warning),
separator(item->separator), separator(item->separator),
tree(item->tree), tree(item->tree),
unique_filter(item->unique_filter),
table(item->table), table(item->table),
order(item->order), order(item->order),
context(item->context), context(item->context),
...@@ -3125,6 +3115,11 @@ void Item_func_group_concat::cleanup() ...@@ -3125,6 +3115,11 @@ void Item_func_group_concat::cleanup()
delete_tree(tree); delete_tree(tree);
tree= 0; tree= 0;
} }
if (unique_filter)
{
delete unique_filter;
unique_filter= NULL;
}
if (warning) if (warning)
{ {
char warn_buff[MYSQL_ERRMSG_SIZE]; char warn_buff[MYSQL_ERRMSG_SIZE];
...@@ -3154,6 +3149,8 @@ void Item_func_group_concat::clear() ...@@ -3154,6 +3149,8 @@ void Item_func_group_concat::clear()
no_appended= TRUE; no_appended= TRUE;
if (tree) if (tree)
reset_tree(tree); reset_tree(tree);
if (distinct)
unique_filter->reset();
/* No need to reset the table as we never call write_row */ /* No need to reset the table as we never call write_row */
} }
...@@ -3177,9 +3174,19 @@ bool Item_func_group_concat::add() ...@@ -3177,9 +3174,19 @@ bool Item_func_group_concat::add()
} }
null_value= FALSE; null_value= FALSE;
bool row_eligible= TRUE;
if (distinct)
{
/* Filter out duplicate rows. */
uint count= unique_filter->elements_in_tree();
unique_filter->unique_add(table->record[0] + table->s->null_bytes);
if (count == unique_filter->elements_in_tree())
row_eligible= FALSE;
}
TREE_ELEMENT *el= 0; // Only for safety TREE_ELEMENT *el= 0; // Only for safety
if (tree) if (row_eligible && tree)
el= tree_insert(tree, table->record[0] + table->s->null_bytes, 0, el= tree_insert(tree, table->record[0] + table->s->null_bytes, 0,
tree->custom_arg); tree->custom_arg);
/* /*
...@@ -3187,7 +3194,7 @@ bool Item_func_group_concat::add() ...@@ -3187,7 +3194,7 @@ bool Item_func_group_concat::add()
we can dump the row here in case of GROUP_CONCAT(DISTINCT...) we can dump the row here in case of GROUP_CONCAT(DISTINCT...)
instead of doing tree traverse later. instead of doing tree traverse later.
*/ */
if (!warning_for_row && if (row_eligible && !warning_for_row &&
(!tree || (el->count == 1 && distinct && !arg_count_order))) (!tree || (el->count == 1 && distinct && !arg_count_order)))
dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this); dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this);
...@@ -3263,7 +3270,6 @@ bool Item_func_group_concat::setup(THD *thd) ...@@ -3263,7 +3270,6 @@ bool Item_func_group_concat::setup(THD *thd)
{ {
List<Item> list; List<Item> list;
SELECT_LEX *select_lex= thd->lex->current_select; SELECT_LEX *select_lex= thd->lex->current_select;
qsort_cmp2 compare_key;
DBUG_ENTER("Item_func_group_concat::setup"); DBUG_ENTER("Item_func_group_concat::setup");
/* /*
...@@ -3334,38 +3340,33 @@ bool Item_func_group_concat::setup(THD *thd) ...@@ -3334,38 +3340,33 @@ bool Item_func_group_concat::setup(THD *thd)
table->file->extra(HA_EXTRA_NO_ROWS); table->file->extra(HA_EXTRA_NO_ROWS);
table->no_rows= 1; table->no_rows= 1;
/*
Need sorting or uniqueness: init tree and choose a function to sort.
Don't reserve space for NULLs: if any of gconcat arguments is NULL,
the row is not added to the result.
*/
uint tree_key_length= table->s->reclength - table->s->null_bytes;
if (distinct || arg_count_order) if (arg_count_order)
{ {
/*
Need sorting: init tree and choose a function to sort.
Don't reserve space for NULLs: if any of gconcat arguments is NULL,
the row is not added to the result.
*/
uint tree_key_length= table->s->reclength - table->s->null_bytes;
tree= &tree_base; tree= &tree_base;
if (arg_count_order)
{
if (distinct)
compare_key= (qsort_cmp2) group_concat_key_cmp_with_distinct_and_order;
else
compare_key= (qsort_cmp2) group_concat_key_cmp_with_order;
}
else
{
compare_key= (qsort_cmp2) group_concat_key_cmp_with_distinct;
}
/* /*
Create a tree for sorting. The tree is used to sort and to remove Create a tree for sorting. The tree is used to sort (according to the
duplicate values (according to the syntax of this function). If there syntax of this function). If there is no ORDER BY clause, we don't
is no DISTINCT or ORDER BY clauses, we don't create this tree. create this tree.
*/ */
init_tree(tree, (uint) min(thd->variables.max_heap_table_size, init_tree(tree, (uint) min(thd->variables.max_heap_table_size,
thd->variables.sortbuff_size/16), 0, thd->variables.sortbuff_size/16), 0,
tree_key_length, compare_key, 0, NULL, (void*) this); tree_key_length,
group_concat_key_cmp_with_order , 0, NULL, (void*) this);
} }
if (distinct)
unique_filter= new Unique(group_concat_key_cmp_with_distinct,
(void*)this,
tree_key_length,
thd->variables.max_heap_table_size);
DBUG_RETURN(FALSE); DBUG_RETURN(FALSE);
} }
...@@ -3435,3 +3436,10 @@ void Item_func_group_concat::print(String *str) ...@@ -3435,3 +3436,10 @@ void Item_func_group_concat::print(String *str)
str->append(*separator); str->append(*separator);
str->append(STRING_WITH_LEN("\')")); str->append(STRING_WITH_LEN("\')"));
} }
Item_func_group_concat::~Item_func_group_concat()
{
if (unique_filter)
delete unique_filter;
}
...@@ -1173,11 +1173,22 @@ class Item_func_group_concat : public Item_sum ...@@ -1173,11 +1173,22 @@ class Item_func_group_concat : public Item_sum
String *separator; String *separator;
TREE tree_base; TREE tree_base;
TREE *tree; TREE *tree;
/**
If DISTINCT is used with this GROUP_CONCAT, this member is used to filter
out duplicates.
@see Item_func_group_concat::setup
@see Item_func_group_concat::add
@see Item_func_group_concat::clear
*/
Unique *unique_filter;
TABLE *table; TABLE *table;
ORDER **order; ORDER **order;
Name_resolution_context *context; Name_resolution_context *context;
uint arg_count_order; // total count of ORDER BY items /** The number of ORDER BY items. */
uint arg_count_field; // count of arguments uint arg_count_order;
/** The number of selected items, aka the expr list. */
uint arg_count_field;
uint count_cut_values; uint count_cut_values;
bool distinct; bool distinct;
bool warning_for_row; bool warning_for_row;
...@@ -1190,13 +1201,10 @@ class Item_func_group_concat : public Item_sum ...@@ -1190,13 +1201,10 @@ class Item_func_group_concat : public Item_sum
*/ */
Item_func_group_concat *original; Item_func_group_concat *original;
friend int group_concat_key_cmp_with_distinct(void* arg, byte* key1, friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
byte* key2); const void* key2);
friend int group_concat_key_cmp_with_order(void* arg, byte* key1, friend int group_concat_key_cmp_with_order(void* arg, const void* key1,
byte* key2); const void* key2);
friend int group_concat_key_cmp_with_distinct_and_order(void* arg,
byte* key1,
byte* key2);
friend int dump_leaf_key(byte* key, friend int dump_leaf_key(byte* key,
element_count count __attribute__((unused)), element_count count __attribute__((unused)),
Item_func_group_concat *group_concat_item); Item_func_group_concat *group_concat_item);
...@@ -1207,7 +1215,7 @@ public: ...@@ -1207,7 +1215,7 @@ public:
SQL_LIST *is_order, String *is_separator); SQL_LIST *is_order, String *is_separator);
Item_func_group_concat(THD *thd, Item_func_group_concat *item); Item_func_group_concat(THD *thd, Item_func_group_concat *item);
~Item_func_group_concat() {} ~Item_func_group_concat();
void cleanup(); void cleanup();
enum Sumfunctype sum_func () const {return GROUP_CONCAT_FUNC;} enum Sumfunctype sum_func () const {return GROUP_CONCAT_FUNC;}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment