Commit 70952a55 authored by Sergey Petrunya's avatar Sergey Petrunya

MWL#90: Non-merged semi-joins

- Fix trivial bugs in post-GROUP-BY output cardinality calculations
- Add testcases
parent 524e6aad
drop table if exists t0, t1, t2, t3, t4;
set @save_optimizer_switch=@@optimizer_switch;
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1 as select * from t0;
# The following should use full scan on <subquery2> and it must scan 1 row:
explain select * from t0 where a in (select max(a) from t1);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 1
1 PRIMARY t0 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
2 SUBQUERY t1 ALL NULL NULL NULL NULL 10
select * from t0 where a in (select max(a) from t1);
a
9
insert into t1 values (11);
select * from t0 where a in (select max(a) from t1);
a
delete from t1 where a=11;
insert into t0 values (NULL);
select * from t0 where a in (select max(a) from t1);
a
9
delete from t0 where a is NULL;
delete from t1;
select * from t0 where a in (select max(a) from t1);
a
insert into t0 values (NULL);
select * from t0 where a in (select max(a) from t1);
a
delete from t0 where a is NULL;
drop table t1;
create table t1 (a int, b int);
insert into t1 select a,a from t0;
create table t2 as select * from t1 where a<5;
create table t3 as select (A.a + 10*B.a) as a from t0 A, t0 B;
alter table t3 add primary key(a);
# The following should have do a full scan on <subquery2> and scan 5 rows
# (despite that subquery's join output estimate is 50 rows)
explain select * from t3 where a in (select max(t2.a) from t1, t2 group by t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 5 Using where
1 PRIMARY t3 eq_ref PRIMARY PRIMARY 8 <subquery2>.max(t2.a) 1 Using where; Using index
2 SUBQUERY t2 ALL NULL NULL NULL NULL 5 Using temporary; Using filesort
2 SUBQUERY t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
# Compare to this which really will have 50 record combinations:
explain select * from t3 where a in (select max(t2.a) from t1, t2 group by t2.b, t1.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 50 Using where
1 PRIMARY t3 eq_ref PRIMARY PRIMARY 8 <subquery2>.max(t2.a) 1 Using where; Using index
2 SUBQUERY t2 ALL NULL NULL NULL NULL 5 Using temporary; Using filesort
2 SUBQUERY t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
# Outer joins also work:
explain select * from t3
where a in (select max(t2.a) from t1 left join t2 on t1.a=t2.a group by t2.b, t1.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 50 Using where
1 PRIMARY t3 eq_ref PRIMARY PRIMARY 8 <subquery2>.max(t2.a) 1 Using where; Using index
2 SUBQUERY t1 ALL NULL NULL NULL NULL 10 Using temporary; Using filesort
2 SUBQUERY t2 ALL NULL NULL NULL NULL 5 Using where
create table t4 (a int, b int, filler char(20), unique key(a,b));
insert into t4 select A.a + 10*B.a, A.a + 10*B.a, 'filler' from t0 A, t0 B;
explain select * from t0, t4 where
t4.b=t0.a and t4.a in (select max(t2.a) from t1, t2 group by t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t0 ALL NULL NULL NULL NULL 10
1 PRIMARY t4 ALL a NULL NULL NULL 100 Using where; Using join buffer (flat, BNL join)
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 5 test.t4.a 1
2 SUBQUERY t2 ALL NULL NULL NULL NULL 5 Using temporary; Using filesort
2 SUBQUERY t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
insert into t4 select 100 + (B.a *100 + A.a), 100 + (B.a*100 + A.a), 'filler' from t4 A, t0 B;
explain select * from t4 where
t4.a in (select max(t2.a) from t1, t2 group by t2.b) and
t4.b in (select max(t2.a) from t1, t2 group by t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 5 Using where
1 PRIMARY <subquery3> ALL distinct_key NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join)
1 PRIMARY t4 ref a a 10 <subquery2>.max(t2.a),<subquery3>.max(t2.a) 12
3 SUBQUERY t2 ALL NULL NULL NULL NULL 5 Using temporary; Using filesort
3 SUBQUERY t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
2 SUBQUERY t2 ALL NULL NULL NULL NULL 5 Using temporary; Using filesort
2 SUBQUERY t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
drop table t1,t2,t3,t4;
drop table t0;
#
# Tests for non-merged semi-joins
#
--disable_warnings
drop table if exists t0, t1, t2, t3, t4;
--enable_warnings
set @save_optimizer_switch=@@optimizer_switch;
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
# Check the case of subquery having agggregates but not having grouping
create table t1 as select * from t0;
--echo # The following should use full scan on <subquery2> and it must scan 1 row:
explain select * from t0 where a in (select max(a) from t1);
select * from t0 where a in (select max(a) from t1);
# Ok, now check the trivial match/no-match/NULL on the left/NULL on the right cases
insert into t1 values (11);
select * from t0 where a in (select max(a) from t1);
delete from t1 where a=11;
insert into t0 values (NULL);
select * from t0 where a in (select max(a) from t1);
delete from t0 where a is NULL;
delete from t1;
select * from t0 where a in (select max(a) from t1);
insert into t0 values (NULL);
select * from t0 where a in (select max(a) from t1);
delete from t0 where a is NULL;
drop table t1;
#
# Try with join subqueries
#
create table t1 (a int, b int);
insert into t1 select a,a from t0; # 10 rows
create table t2 as select * from t1 where a<5; # 5 rows
create table t3 as select (A.a + 10*B.a) as a from t0 A, t0 B; # 100 rows
alter table t3 add primary key(a);
--echo # The following should have do a full scan on <subquery2> and scan 5 rows
--echo # (despite that subquery's join output estimate is 50 rows)
explain select * from t3 where a in (select max(t2.a) from t1, t2 group by t2.b);
--echo # Compare to this which really will have 50 record combinations:
explain select * from t3 where a in (select max(t2.a) from t1, t2 group by t2.b, t1.b);
--echo # Outer joins also work:
explain select * from t3
where a in (select max(t2.a) from t1 left join t2 on t1.a=t2.a group by t2.b, t1.b);
#
# Check if joins on the outer side also work
#
create table t4 (a int, b int, filler char(20), unique key(a,b));
insert into t4 select A.a + 10*B.a, A.a + 10*B.a, 'filler' from t0 A, t0 B; # 100 rows
explain select * from t0, t4 where
t4.b=t0.a and t4.a in (select max(t2.a) from t1, t2 group by t2.b);
insert into t4 select 100 + (B.a *100 + A.a), 100 + (B.a*100 + A.a), 'filler' from t4 A, t0 B;
explain select * from t4 where
t4.a in (select max(t2.a) from t1, t2 group by t2.b) and
t4.b in (select max(t2.a) from t1, t2 group by t2.b);
drop table t1,t2,t3,t4;
drop table t0;
......@@ -4119,9 +4119,12 @@ double get_fanout_with_deps(JOIN *join, table_map tset)
for (JOIN_TAB *tab= first_top_level_tab(join, WITHOUT_CONST_TABLES); tab;
tab= next_top_level_tab(join, tab))
{
fanout *= (tab->records_read && !tab->emb_sj_nest) ?
rows2double(tab->records_read) : 1;
}
if ((tab->table->map & checked_deps) && !tab->emb_sj_nest &&
tab->records_read != 0)
{
fanout *= rows2double(tab->records_read);
}
}
return fanout;
}
......@@ -4208,7 +4211,7 @@ void check_out_index_stats(JOIN *join)
#endif
double get_post_group_estimate(JOIN* join)
double get_post_group_estimate(JOIN* join, double join_op_rows)
{
table_map tables_in_group_list= table_map(0);
......@@ -4217,8 +4220,10 @@ double get_post_group_estimate(JOIN* join)
{
Item *item= order->item[0];
if (item->used_tables() & RAND_TABLE_BIT)
return HA_POS_ERROR; // TODO: change to join-output-estimate
{
/* Each join output record will be in its own group */
return join_op_rows;
}
tables_in_group_list|= item->used_tables();
}
tables_in_group_list &= ~PSEUDO_TABLE_BITS;
......@@ -4291,7 +4296,7 @@ int subselect_hash_sj_engine::optimize(double *out_rows, double *cost)
if (join->group_list)
{
DBUG_PRINT("info",("Materialized join has grouping, trying to estimate"));
double output_rows= get_post_group_estimate(materialize_join);
double output_rows= get_post_group_estimate(materialize_join, *out_rows);
DBUG_PRINT("info",("Got value of %g", output_rows));
*out_rows= output_rows;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment