Bug #50394: Regression in EXPLAIN with index scan, LIMIT, GROUP BY and

ORDER BY computed col GROUP BY implies ORDER BY in the MySQL dialect of SQL. Therefore, when an index on the first table in the query is used, and that index satisfies ordering according to the GROUP BY clause, the query optimizer estimates the number of tuples that need to be read from this index. If there is a LIMIT clause, table statistics on tables following this 'sort table' are employed. There may be a separate ORDER BY clause however, which mandates reading the whole 'sort table' anyway. But the previous estimate was left untouched. Fixed by removing the estimate from EXPLAIN output if GROUP BY is used in conjunction with an ORDER BY clause that mandates using a temporary table.

Bug #50394: Regression in EXPLAIN with index scan, LIMIT, GROUP BY and
ORDER BY computed col GROUP BY implies ORDER BY in the MySQL dialect of SQL. Therefore, when an index on the first table in the query is used, and that index satisfies ordering according to the GROUP BY clause, the query optimizer estimates the number of tuples that need to be read from this index. If there is a LIMIT clause, table statistics on tables following this 'sort table' are employed. There may be a separate ORDER BY clause however, which mandates reading the whole 'sort table' anyway. But the previous estimate was left untouched. Fixed by removing the estimate from EXPLAIN output if GROUP BY is used in conjunction with an ORDER BY clause that mandates using a temporary table.
3beeb5d0 · Martin Hansson · daa6d1f4 · 3beeb5d0 · 3beeb5d0 · 3beeb5d0
Commit 3beeb5d0 authored Sep 13, 2010 by Martin Hansson
Hide whitespace changes
Inline Side-by-side

Showing with 55 additions and 0 deletions

mysql-test/r/order_by.result mysql-test/r/order_by.result +21 -0

mysql-test/t/order_by.test mysql-test/t/order_by.test +25 -0

sql/sql_select.cc sql/sql_select.cc +9 -0

No files found.
--- a/mysql-test/r/order_by.result
+++ b/mysql-test/r/order_by.result
@@ -1617,4 +1617,25 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	range	a	a	5	NULL	2	Using where; Using temporary; Using filesort
 1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	10	Using join buffer
 DROP TABLE t1, t2;
+#
+# Bug #50394: Regression in EXPLAIN with index scan, LIMIT, GROUP BY and
+# ORDER BY computed col
+#
+CREATE TABLE t1 ( a INT NOT NULL, b INT NOT NULL, KEY( a, b ) );
+INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+INSERT INTO t1 SELECT a + 5, b + 5 FROM t1;
+CREATE TABLE t2( a INT PRIMARY KEY, b INT );
+INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+INSERT INTO t2 SELECT a + 5, b + 5 FROM t2;
+EXPLAIN
+SELECT count(*) AS c, t1.a
+FROM t1 JOIN t2 ON t1.b = t2.a
+WHERE t2.b = 1
+GROUP BY t1.a
+ORDER by c
+LIMIT 2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	a	8	NULL	10	Using index; Using temporary; Using filesort
+1	SIMPLE	t2	eq_ref	PRIMARY	PRIMARY	4	test.t1.b	1	Using where
+DROP TABLE t1, t2;
 End of 5.1 tests
--- a/mysql-test/t/order_by.test
+++ b/mysql-test/t/order_by.test
@@ -1467,4 +1467,29 @@ SELECT * FROM t1 FORCE INDEX FOR JOIN (a), t2 WHERE t1.a < 2 ORDER BY t1.a;
 DROP TABLE t1, t2;
+--echo #
+--echo # Bug #50394: Regression in EXPLAIN with index scan, LIMIT, GROUP BY and
+--echo # ORDER BY computed col
+--echo #
+CREATE TABLE t1 ( a INT NOT NULL, b INT NOT NULL, KEY( a, b ) );
+INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+INSERT INTO t1 SELECT a + 5, b + 5 FROM t1;
+CREATE TABLE t2( a INT PRIMARY KEY, b INT );
+INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+INSERT INTO t2 SELECT a + 5, b + 5 FROM t2;
+EXPLAIN
+SELECT count(*) AS c, t1.a
+FROM t1 JOIN t2 ON t1.b = t2.a
+WHERE t2.b = 1
+GROUP BY t1.a
+ORDER by c
+LIMIT 2;
+DROP TABLE t1, t2;
 --echo End of 5.1 tests
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1486,6 +1486,15 @@ JOIN::optimize()
    }
    if (order)
    {
+      /*
+        Do we need a temporary table due to the ORDER BY not being equal to
+        the GROUP BY? The call to test_if_skip_sort_order above tests for the
+        GROUP BY clause only and hence is not valid in this case. So the
+        estimated number of rows to be read from the first table is not valid.
+        We clear it here so that it doesn't show up in EXPLAIN.
+       */
+      if (need_tmp && (select_options & SELECT_DESCRIBE) != 0)
+        join_tab[const_tables].limit= 0;
      /*
        Force using of tmp table if sorting by a SP or UDF function due to
        their expensive and probably non-deterministic nature.