BUG#18068: SELECT DISTINCT (with duplicates and covering index)

When converting DISTINCT to GROUP BY where the columns are from the covering index and they are quoted twice in the SELECT list the optimizer is creating improper processing sequence. This is because of the fact that the columns of the covering index are not recognized as such and treated as non-index columns. Generally speaking duplicate columns can safely be removed from the GROUP BY/DISTINCT list because this will not add or remove new rows in the resulting set. Duplicates can be removed even if they are not consecutive (as is the case for ORDER BY, where the duplicate columns can be removed only if they are consecutive). So we can safely transform "SELECT DISTINCT a,a FROM ... ORDER BY a" to "SELECT a,a FROM ... GROUP BY a ORDER BY a" instead of "SELECT a,a FROM .. GROUP BY a,a ORDER BY a". We can even transform "SELECT DISTINCT a,b,a FROM ... ORDER BY a,b" to "SELECT a,b,a FROM ... GROUP BY a,b ORDER BY a,b". The fix to this bug consists of checking for duplicate columns in the SELECT list when constructing the GROUP BY list in transforming DISTINCT to GROUP BY and skipping the ones that are already in.

BUG#18068: SELECT DISTINCT (with duplicates and covering index)
When converting DISTINCT to GROUP BY where the columns are from the covering index and they are quoted twice in the SELECT list the optimizer is creating improper processing sequence. This is because of the fact that the columns of the covering index are not recognized as such and treated as non-index columns. Generally speaking duplicate columns can safely be removed from the GROUP BY/DISTINCT list because this will not add or remove new rows in the resulting set. Duplicates can be removed even if they are not consecutive (as is the case for ORDER BY, where the duplicate columns can be removed only if they are consecutive). So we can safely transform "SELECT DISTINCT a,a FROM ... ORDER BY a" to "SELECT a,a FROM ... GROUP BY a ORDER BY a" instead of "SELECT a,a FROM .. GROUP BY a,a ORDER BY a". We can even transform "SELECT DISTINCT a,b,a FROM ... ORDER BY a,b" to "SELECT a,b,a FROM ... GROUP BY a,b ORDER BY a,b". The fix to this bug consists of checking for duplicate columns in the SELECT list when constructing the GROUP BY list in transforming DISTINCT to GROUP BY and skipping the ones that are already in.
7bae0de3 · gkodinov@mysql.com · f0341ac4 · 7bae0de3 · 7bae0de3 · 7bae0de3
Commit 7bae0de3 authored May 09, 2006 by gkodinov@mysql.com
5 changed files
--- a/mysql-test/r/distinct.result
+++ b/mysql-test/r/distinct.result
@@ -533,3 +533,15 @@ select count(distinct concat(x,y)) from t1;
 count(distinct concat(x,y))
 2
 drop table t1;
+CREATE TABLE t1 (a INT, b INT, PRIMARY KEY (a,b));
+INSERT INTO t1 VALUES (1, 101);
+INSERT INTO t1 SELECT a + 1, a + 101 FROM t1;
+INSERT INTO t1 SELECT a + 2, a + 102 FROM t1;
+INSERT INTO t1 SELECT a + 4, a + 104 FROM t1;
+INSERT INTO t1 SELECT a + 8, a + 108 FROM t1;
+EXPLAIN SELECT DISTINCT a,a FROM t1 WHERE b < 12 ORDER BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	PRIMARY	8	NULL	16	Using where; Using index
+SELECT DISTINCT a,a FROM t1 WHERE b < 12 ORDER BY a;
+a	a
+DROP TABLE t1;
--- a/mysql-test/r/group_min_max.result
+++ b/mysql-test/r/group_min_max.result
@@ -2116,3 +2116,25 @@ COUNT(DISTINCT a)
 1
 DROP TABLE t1;
 DROP PROCEDURE a;
+CREATE TABLE t1 (a varchar(64) NOT NULL default '', PRIMARY KEY(a));
+INSERT INTO t1 (a) VALUES 
+(''), ('CENTRAL'), ('EASTERN'), ('GREATER LONDON'),
+('NORTH CENTRAL'), ('NORTH EAST'), ('NORTH WEST'), ('SCOTLAND'),
+('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN');
+EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	NULL	PRIMARY	66	NULL	12	Using index for group-by
+SELECT DISTINCT a,a FROM t1 ORDER BY a;
+a	a
+	
+CENTRAL	CENTRAL
+EASTERN	EASTERN
+GREATER LONDON	GREATER LONDON
+NORTH CENTRAL	NORTH CENTRAL
+NORTH EAST	NORTH EAST
+NORTH WEST	NORTH WEST
+SCOTLAND	SCOTLAND
+SOUTH EAST	SOUTH EAST
+SOUTH WEST	SOUTH WEST
+WESTERN	WESTERN
+DROP TABLE t1;
--- a/mysql-test/t/distinct.test
+++ b/mysql-test/t/distinct.test
@@ -382,3 +382,19 @@ INSERT INTO t1 VALUES
 select count(distinct x,y) from t1;
 select count(distinct concat(x,y)) from t1;
 drop table t1;
+
+#
+# Bug #18068: SELECT DISTINCT
+#
+CREATE TABLE t1 (a INT, b INT, PRIMARY KEY (a,b));
+
+INSERT INTO t1 VALUES (1, 101);
+INSERT INTO t1 SELECT a + 1, a + 101 FROM t1;
+INSERT INTO t1 SELECT a + 2, a + 102 FROM t1;
+INSERT INTO t1 SELECT a + 4, a + 104 FROM t1;
+INSERT INTO t1 SELECT a + 8, a + 108 FROM t1;
+
+EXPLAIN SELECT DISTINCT a,a FROM t1 WHERE b < 12 ORDER BY a;
+SELECT DISTINCT a,a FROM t1 WHERE b < 12 ORDER BY a;
+
+DROP TABLE t1;
--- a/mysql-test/t/group_min_max.test
+++ b/mysql-test/t/group_min_max.test
@@ -782,3 +782,19 @@ SELECT COUNT(DISTINCT a) FROM t1 WHERE a=0;

 DROP TABLE t1;
 DROP PROCEDURE a;
+
+#
+# Bug #18068: SELECT DISTINCT
+#
+
+CREATE TABLE t1 (a varchar(64) NOT NULL default '', PRIMARY KEY(a));
+
+INSERT INTO t1 (a) VALUES 
+  (''), ('CENTRAL'), ('EASTERN'), ('GREATER LONDON'),
+  ('NORTH CENTRAL'), ('NORTH EAST'), ('NORTH WEST'), ('SCOTLAND'),
+  ('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN');
+
+EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a;  
+SELECT DISTINCT a,a FROM t1 ORDER BY a;  
+
+DROP TABLE t1;
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -12698,6 +12698,17 @@ create_distinct_group(THD *thd, Item **ref_pointer_array,
  {
    if (!item->const_item() && !item->with_sum_func && !item->marker)
    {
+      /* 
+        Don't put duplicate columns from the SELECT list into the 
+        GROUP BY list.
+      */
+      ORDER *ord_iter;
+      for (ord_iter= group; ord_iter; ord_iter= ord_iter->next)
+        if ((*ord_iter->item)->eq(item, 1))
+          break;
+      if (ord_iter)
+        continue;
+      
      ORDER *ord=(ORDER*) thd->calloc(sizeof(ORDER));
      if (!ord)
 	return 0;