Commit 13901802 authored by sergefp@mysql.com's avatar sergefp@mysql.com

BUG#22393: Very wrong E(#rows(ref(const)) for key with skewed distribution

- Check if we have E(#rows) for 'range' access on the smaller interval 
  on the same index. If yes, adjust the estimate.
parent 94028c61
......@@ -896,3 +896,17 @@ EXPLAIN SELECT * FROM t1 WHERE 0 NOT BETWEEN b AND c;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index_merge idx1,idx2 idx1,idx2 4,4 NULL 4 Using sort_union(idx1,idx2); Using where
DROP TABLE t1;
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, b int, filler char(100));
insert into t2 select A.a + 10 * (B.a + 10 * C.a), 10, 'filler' from t1 A,
t1 B, t1 C where A.a < 5;
insert into t2 select 1000, b, 'filler' from t2;
alter table t2 add index (a,b);
select 'In following EXPLAIN the access method should be ref, #rows~=500 (and not 2)' Z;
Z
In following EXPLAIN the access method should be ref, #rows~=500 (and not 2)
explain select * from t2 where a=1000 and b<11;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ref a a 5 const 502 Using where
drop table t1, t2;
......@@ -711,3 +711,30 @@ DROP TABLE t1;
# End of 5.0 tests
# BUG#22393 fix: Adjust 'ref' estimate if we have 'range' estimate for
# a smaller scan interval
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, b int, filler char(100));
insert into t2 select A.a + 10 * (B.a + 10 * C.a), 10, 'filler' from t1 A,
t1 B, t1 C where A.a < 5;
insert into t2 select 1000, b, 'filler' from t2;
alter table t2 add index (a,b);
# t2 values
# ( 1 , 10, 'filler')
# ( 2 , 10, 'filler')
# ( 3 , 10, 'filler')
# (... , 10, 'filler')
# ...
# (1000, 10, 'filler') - 500 times
# 500 rows, 1 row
select 'In following EXPLAIN the access method should be ref, #rows~=500 (and not 2)' Z;
explain select * from t2 where a=1000 and b<11;
drop table t1, t2;
......@@ -3711,7 +3711,30 @@ best_access_path(JOIN *join,
{
/* Check if we have statistic about the distribution */
if ((records= keyinfo->rec_per_key[max_key_part-1]))
{
/*
Fix for the case where the index statistics is too
optimistic: If
(1) We're considering ref(const) and there is quick select
on the same index,
(2) and that quick select uses more keyparts (i.e. it will
scan equal/smaller interval then this ref(const))
(3) and E(#rows) for quick select is higher then our
estimate,
Then
We'll use E(#rows) from quick select.
Q: Why do we choose to use 'ref'? Won't quick select be
cheaper in some cases ?
TODO: figure this out and adjust the plan choice if needed.
*/
if (!found_ref && table->quick_keys.is_set(key) && // (1)
table->quick_key_parts[key] > max_key_part && // (2)
records < (double)table->quick_rows[key]) // (3)
records= (double)table->quick_rows[key];
tmp= records;
}
else
{
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment