Commit baae7a97 authored by unknown's avatar unknown

BUG#18068: SELECT DISTINCT (with duplicates and covering index)

When converting DISTINCT to GROUP BY where the columns are from the covering
index and they are quoted twice in the SELECT list the optimizer is creating
improper processing sequence. This is because of the fact that the columns
of the covering index are not recognized as such and treated as non-index
columns.

Generally speaking duplicate columns can safely be removed from the GROUP
BY/DISTINCT list because this will not add or remove new rows in the
resulting set. Duplicates can be removed even if they are not consecutive
(as is the case for ORDER BY, where the duplicate columns can be removed
only if they are consecutive).

So we can safely transform "SELECT DISTINCT a,a FROM ... ORDER BY a" to
"SELECT a,a FROM ... GROUP BY a ORDER BY a" instead of 
"SELECT a,a FROM .. GROUP BY a,a ORDER BY a". We can even transform 
"SELECT DISTINCT a,b,a FROM ... ORDER BY a,b" to
"SELECT a,b,a FROM ... GROUP BY a,b ORDER BY a,b".

The fix to this bug consists of checking for duplicate columns in the SELECT
list when constructing the GROUP BY list in transforming DISTINCT to GROUP
BY and skipping the ones that are already in.


mysql-test/r/distinct.result:
  test case for the bug without loose index scan
mysql-test/r/group_min_max.result:
  test case for the bug
mysql-test/t/distinct.test:
  test case for the bug without loose index scan
mysql-test/t/group_min_max.test:
  test case for the bug
sql/sql_select.cc:
  duplicates check and removal
parent 33417297
......@@ -533,3 +533,15 @@ select count(distinct concat(x,y)) from t1;
count(distinct concat(x,y))
2
drop table t1;
CREATE TABLE t1 (a INT, b INT, PRIMARY KEY (a,b));
INSERT INTO t1 VALUES (1, 101);
INSERT INTO t1 SELECT a + 1, a + 101 FROM t1;
INSERT INTO t1 SELECT a + 2, a + 102 FROM t1;
INSERT INTO t1 SELECT a + 4, a + 104 FROM t1;
INSERT INTO t1 SELECT a + 8, a + 108 FROM t1;
EXPLAIN SELECT DISTINCT a,a FROM t1 WHERE b < 12 ORDER BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL PRIMARY 8 NULL 16 Using where; Using index
SELECT DISTINCT a,a FROM t1 WHERE b < 12 ORDER BY a;
a a
DROP TABLE t1;
......@@ -2116,3 +2116,25 @@ COUNT(DISTINCT a)
1
DROP TABLE t1;
DROP PROCEDURE a;
CREATE TABLE t1 (a varchar(64) NOT NULL default '', PRIMARY KEY(a));
INSERT INTO t1 (a) VALUES
(''), ('CENTRAL'), ('EASTERN'), ('GREATER LONDON'),
('NORTH CENTRAL'), ('NORTH EAST'), ('NORTH WEST'), ('SCOTLAND'),
('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN');
EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL PRIMARY 66 NULL 12 Using index for group-by
SELECT DISTINCT a,a FROM t1 ORDER BY a;
a a
CENTRAL CENTRAL
EASTERN EASTERN
GREATER LONDON GREATER LONDON
NORTH CENTRAL NORTH CENTRAL
NORTH EAST NORTH EAST
NORTH WEST NORTH WEST
SCOTLAND SCOTLAND
SOUTH EAST SOUTH EAST
SOUTH WEST SOUTH WEST
WESTERN WESTERN
DROP TABLE t1;
......@@ -382,3 +382,19 @@ INSERT INTO t1 VALUES
select count(distinct x,y) from t1;
select count(distinct concat(x,y)) from t1;
drop table t1;
#
# Bug #18068: SELECT DISTINCT
#
CREATE TABLE t1 (a INT, b INT, PRIMARY KEY (a,b));
INSERT INTO t1 VALUES (1, 101);
INSERT INTO t1 SELECT a + 1, a + 101 FROM t1;
INSERT INTO t1 SELECT a + 2, a + 102 FROM t1;
INSERT INTO t1 SELECT a + 4, a + 104 FROM t1;
INSERT INTO t1 SELECT a + 8, a + 108 FROM t1;
EXPLAIN SELECT DISTINCT a,a FROM t1 WHERE b < 12 ORDER BY a;
SELECT DISTINCT a,a FROM t1 WHERE b < 12 ORDER BY a;
DROP TABLE t1;
......@@ -782,3 +782,19 @@ SELECT COUNT(DISTINCT a) FROM t1 WHERE a=0;
DROP TABLE t1;
DROP PROCEDURE a;
#
# Bug #18068: SELECT DISTINCT
#
CREATE TABLE t1 (a varchar(64) NOT NULL default '', PRIMARY KEY(a));
INSERT INTO t1 (a) VALUES
(''), ('CENTRAL'), ('EASTERN'), ('GREATER LONDON'),
('NORTH CENTRAL'), ('NORTH EAST'), ('NORTH WEST'), ('SCOTLAND'),
('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN');
EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a;
SELECT DISTINCT a,a FROM t1 ORDER BY a;
DROP TABLE t1;
......@@ -12698,6 +12698,17 @@ create_distinct_group(THD *thd, Item **ref_pointer_array,
{
if (!item->const_item() && !item->with_sum_func && !item->marker)
{
/*
Don't put duplicate columns from the SELECT list into the
GROUP BY list.
*/
ORDER *ord_iter;
for (ord_iter= group; ord_iter; ord_iter= ord_iter->next)
if ((*ord_iter->item)->eq(item, 1))
break;
if (ord_iter)
continue;
ORDER *ord=(ORDER*) thd->calloc(sizeof(ORDER));
if (!ord)
return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment