BUG#22393: Very wrong E(#rows(ref(const)) for key with skewed distribution

- Check if we have E(#rows) for 'range' access on the smaller interval on the same index. If yes, adjust the estimate.

BUG#22393: Very wrong E(#rows(ref(const)) for key with skewed distribution
- Check if we have E(#rows) for 'range' access on the smaller interval on the same index. If yes, adjust the estimate.
13901802 · sergefp@mysql.com · 94028c61 · 13901802 · 13901802 · 13901802
Commit 13901802 authored Sep 18, 2006 by sergefp@mysql.com
Show whitespace changes
Inline Side-by-side

Showing with 64 additions and 0 deletions

mysql-test/r/range.result mysql-test/r/range.result +14 -0

mysql-test/t/range.test mysql-test/t/range.test +27 -0

sql/sql_select.cc sql/sql_select.cc +23 -0

No files found.
--- a/mysql-test/r/range.result
+++ b/mysql-test/r/range.result
@@ -896,3 +896,17 @@ EXPLAIN SELECT * FROM t1 WHERE 0 NOT BETWEEN b AND c;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index_merge	idx1,idx2	idx1,idx2	4,4	NULL	4	Using sort_union(idx1,idx2); Using where
 DROP TABLE t1;
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t2 (a int, b int, filler char(100));
+insert into t2 select A.a + 10 * (B.a + 10 * C.a), 10, 'filler' from t1 A,
+t1 B, t1 C where A.a < 5;
+insert into t2 select 1000, b, 'filler' from t2;
+alter table t2 add index (a,b);
+select 'In following EXPLAIN the access method should be ref, #rows~=500 (and not 2)' Z;
+Z
+In following EXPLAIN the access method should be ref, #rows~=500 (and not 2)
+explain select * from t2 where a=1000 and b<11;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	ref	a	a	5	const	502	Using where
+drop table t1, t2;
--- a/mysql-test/t/range.test
+++ b/mysql-test/t/range.test
@@ -711,3 +711,30 @@ DROP TABLE t1;


 # End of 5.0 tests
+
+# BUG#22393 fix: Adjust 'ref' estimate if we have 'range' estimate for
+#                a smaller scan interval
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t2 (a int, b int, filler char(100));
+insert into t2 select A.a + 10 * (B.a + 10 * C.a), 10, 'filler' from t1 A,
+t1 B, t1 C where A.a < 5;
+
+insert into t2 select 1000, b, 'filler' from t2;
+alter table t2 add index (a,b);
+# t2 values 
+#  ( 1  , 10, 'filler')
+#  ( 2  , 10, 'filler')
+#  ( 3  , 10, 'filler')
+#  (... , 10, 'filler')
+#   ...
+#  (1000, 10, 'filler') - 500 times
+
+# 500 rows, 1 row
+
+select 'In following EXPLAIN the access method should be ref, #rows~=500 (and not 2)' Z;
+explain select * from t2 where a=1000 and b<11;
+
+drop table t1, t2;
+
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -3711,7 +3711,30 @@ best_access_path(JOIN      *join,
            {
              /* Check if we have statistic about the distribution */
              if ((records= keyinfo->rec_per_key[max_key_part-1]))
+              {
+                /* 
+                  Fix for the case where the index statistics is too
+                  optimistic: If 
+                  (1) We're considering ref(const) and there is quick select
+                      on the same index, 
+                  (2) and that quick select uses more keyparts (i.e. it will
+                      scan equal/smaller interval then this ref(const))
+                  (3) and E(#rows) for quick select is higher then our
+                      estimate,
+                  Then 
+                    We'll use E(#rows) from quick select.
+
+                  Q: Why do we choose to use 'ref'? Won't quick select be
+                  cheaper in some cases ?
+                  TODO: figure this out and adjust the plan choice if needed.
+                */
+                if (!found_ref && table->quick_keys.is_set(key) &&    // (1)
+                    table->quick_key_parts[key] > max_key_part &&     // (2)
+                    records < (double)table->quick_rows[key])         // (3)
+                  records= (double)table->quick_rows[key];
+
                tmp= records;
+              }
              else
              {
                /*