Commit a02682ab authored by unknown's avatar unknown

MWL#89

- Added regression test with queries over the WORLD database.
- Discovered and fixed several bugs in the related cost calculation
  functionality both in the semijoin and non-semijon subquery code.
- Added DBUG printing of the cost variables used to decide between
  IN-EXISTS and MATERIALIZATION.
parent 0cf912c2
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -13,4 +13,3 @@ kill : Bug#37780 2008-12-03 HHunger need some changes to be ...@@ -13,4 +13,3 @@ kill : Bug#37780 2008-12-03 HHunger need some changes to be
query_cache_28249 : Bug#43861 2009-03-25 main.query_cache_28249 fails sporadically query_cache_28249 : Bug#43861 2009-03-25 main.query_cache_28249 fails sporadically
log_tables-big : Bug#48646 2010-11-15 mattiasj report already exists log_tables-big : Bug#48646 2010-11-15 mattiasj report already exists
read_many_rows_innodb : Bug#37635 2010-11-15 mattiasj report already exists read_many_rows_innodb : Bug#37635 2010-11-15 mattiasj report already exists
main.subselect_mat_cost : MWL#89 tests that must be adjusted to the cost model introduced after the code review
This diff is collapsed.
...@@ -38,11 +38,14 @@ Item_subselect::Item_subselect(): ...@@ -38,11 +38,14 @@ Item_subselect::Item_subselect():
Item_result_field(), value_assigned(0), own_engine(0), thd(0), old_engine(0), Item_result_field(), value_assigned(0), own_engine(0), thd(0), old_engine(0),
used_tables_cache(0), have_to_be_excluded(0), const_item_cache(1), used_tables_cache(0), have_to_be_excluded(0), const_item_cache(1),
inside_first_fix_fields(0), done_first_fix_fields(FALSE), inside_first_fix_fields(0), done_first_fix_fields(FALSE),
substitution(0), expr_cache(0), engine(0), forced_const(FALSE), eliminated(FALSE), expr_cache(0), forced_const(FALSE), substitution(0), engine(0), eliminated(FALSE),
engine_changed(0), changed(0), is_correlated(FALSE) engine_changed(0), changed(0), is_correlated(FALSE)
{ {
DBUG_ENTER("Item_subselect::Item_subselect"); DBUG_ENTER("Item_subselect::Item_subselect");
DBUG_PRINT("enter", ("this: 0x%lx", (ulong) this)); DBUG_PRINT("enter", ("this: 0x%lx", (ulong) this));
#ifndef DBUG_OFF
exec_counter= 0;
#endif
with_subselect= 1; with_subselect= 1;
reset(); reset();
/* /*
...@@ -130,6 +133,10 @@ void Item_subselect::cleanup() ...@@ -130,6 +133,10 @@ void Item_subselect::cleanup()
value_assigned= 0; value_assigned= 0;
expr_cache= 0; expr_cache= 0;
forced_const= FALSE; forced_const= FALSE;
DBUG_PRINT("info", ("exec_counter: %d", exec_counter));
#ifndef DBUG_OFF
exec_counter= 0;
#endif
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
...@@ -548,7 +555,9 @@ bool Item_subselect::exec() ...@@ -548,7 +555,9 @@ bool Item_subselect::exec()
DBUG_EXECUTE_IF("subselect_exec_fail", return 1;); DBUG_EXECUTE_IF("subselect_exec_fail", return 1;);
res= engine->exec(); res= engine->exec();
#ifndef DBUG_OFF
++exec_counter;
#endif
if (engine_changed) if (engine_changed)
{ {
engine_changed= 0; engine_changed= 0;
......
...@@ -52,6 +52,17 @@ protected: ...@@ -52,6 +52,17 @@ protected:
bool inside_first_fix_fields; bool inside_first_fix_fields;
bool done_first_fix_fields; bool done_first_fix_fields;
Item *expr_cache;
/*
Set to TRUE if at optimization or execution time we determine that this
item's value is a constant. We need this member because it is not possible
to substitute 'this' with a constant item.
*/
bool forced_const;
#ifndef DBUG_OFF
/* Count the number of times this subquery predicate has been executed. */
uint exec_counter;
#endif
public: public:
/* /*
Used inside Item_subselect::fix_fields() according to this scenario: Used inside Item_subselect::fix_fields() according to this scenario:
...@@ -66,19 +77,13 @@ public: ...@@ -66,19 +77,13 @@ public:
substitution= NULL; substitution= NULL;
< Item_subselect::fix_fields < Item_subselect::fix_fields
*/ */
/* TODO make this protected member again. */
Item *substitution; Item *substitution;
/* unit of subquery */
st_select_lex_unit *unit;
Item *expr_cache;
/* engine that perform execution of subselect (single select or union) */ /* engine that perform execution of subselect (single select or union) */
/* TODO make this protected member again. */
subselect_engine *engine; subselect_engine *engine;
/* /* unit of subquery */
Set to TRUE if at optimization or execution time we determine that this st_select_lex_unit *unit;
item's value is a constant. We need this member because it is not possible
to substitute 'this' with a constant item.
*/
bool forced_const;
/* A reference from inside subquery predicate to somewhere outside of it */ /* A reference from inside subquery predicate to somewhere outside of it */
class Ref_to_outside : public Sql_alloc class Ref_to_outside : public Sql_alloc
{ {
......
...@@ -4324,8 +4324,6 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -4324,8 +4324,6 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
{ {
JOIN *outer_join; JOIN *outer_join;
JOIN *inner_join= this; JOIN *inner_join= this;
/* Number of (partial) rows of the outer JOIN filtered by the IN predicate. */
double outer_record_count;
/* Number of unique value combinations filtered by the IN predicate. */ /* Number of unique value combinations filtered by the IN predicate. */
double outer_lookup_keys; double outer_lookup_keys;
/* Cost and row count of the unmodified subquery. */ /* Cost and row count of the unmodified subquery. */
...@@ -4345,38 +4343,37 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -4345,38 +4343,37 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
outer_join= unit->outer_select() ? unit->outer_select()->join : NULL; outer_join= unit->outer_select() ? unit->outer_select()->join : NULL;
if (outer_join) if (outer_join)
{ {
uint outer_partial_plan_len; /*
The index of the last JOIN_TAB in the outer JOIN where in_subs is
attached (pushed to).
*/
uint max_outer_join_tab_idx;
/* /*
Make_cond_for_table is called for predicates only in the WHERE/ON Make_cond_for_table is called for predicates only in the WHERE/ON
clauses. In all other cases, predicates are not pushed to any clauses. In all other cases, predicates are not pushed to any
JOIN_TAB, and their joi_tab_idx remains MAX_TABLES. Such predicates JOIN_TAB, and their join_tab_idx remains MAX_TABLES. Such predicates
are evaluated for each complete row of the outer join. are evaluated for each complete row of the outer join.
*/ */
outer_partial_plan_len= (in_subs->get_join_tab_idx() == MAX_TABLES) ? DBUG_ASSERT(outer_join->table_count > 0);
outer_join->table_count : max_outer_join_tab_idx= (in_subs->get_join_tab_idx() == MAX_TABLES) ?
in_subs->get_join_tab_idx() + 1; outer_join->table_count - 1:
outer_join->get_partial_cost_and_fanout(outer_partial_plan_len, in_subs->get_join_tab_idx();
/*
TODO:
Currently outer_lookup_keys is computed as the number of rows in
the partial join including the JOIN_TAB where the IN predicate is
pushed to. In the general case this is a gross overestimate because
due to caching we are interested only in the number of unique keys.
The search key may be formed by columns from much fewer than all
tables in the partial join. Example:
select * from t1, t2 where t1.c1 = t2.key AND t2.c2 IN (select ...);
If the join order: t1, t2, the number of unique lookup keys is ~ to
the number of unique values t2.c2 in the partial join t1 join t2.
*/
outer_join->get_partial_cost_and_fanout(max_outer_join_tab_idx,
table_map(-1), table_map(-1),
&dummy, &dummy,
&outer_record_count); &outer_lookup_keys);
if (outer_join->table_count > outer_join->const_tables)
{
outer_join->get_partial_cost_and_fanout(outer_partial_plan_len,
in_subs->used_tables(),
&dummy,
&outer_lookup_keys);
/*
outer_lookup_keys= prev_record_reads(outer_join->best_positions,
outer_partial_plan_len,
in_subs->used_tables());
*/
}
else
{
/* If all tables are constant, positions is undefined. */
outer_lookup_keys= 1;
}
} }
else else
{ {
...@@ -4384,17 +4381,8 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -4384,17 +4381,8 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
TODO: outer_join can be NULL for DELETE statements. TODO: outer_join can be NULL for DELETE statements.
How to compute its cost? How to compute its cost?
*/ */
outer_record_count= 1; outer_lookup_keys= 1;
outer_lookup_keys=1;
} }
/*
There cannot be more lookup keys than the total number of records.
TODO: this a temporary solution until we find a better way to compute
get_partial_join_cost() and prev_record_reads() in a consitent manner,
where it is guaranteed that (outer_lookup_keys <= outer_record_count).
*/
if (outer_lookup_keys > outer_record_count)
outer_lookup_keys= outer_record_count;
/* /*
B. Estimate the cost and number of records of the subquery both B. Estimate the cost and number of records of the subquery both
...@@ -4442,7 +4430,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -4442,7 +4430,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
write_cost * inner_record_count_1; write_cost * inner_record_count_1;
materialize_strategy_cost= materialization_cost + materialize_strategy_cost= materialization_cost +
outer_record_count * lookup_cost; outer_lookup_keys * lookup_cost;
/* C.2 Compute the cost of the IN=>EXISTS strategy. */ /* C.2 Compute the cost of the IN=>EXISTS strategy. */
in_exists_strategy_cost= outer_lookup_keys * inner_read_time_2; in_exists_strategy_cost= outer_lookup_keys * inner_read_time_2;
...@@ -4452,6 +4440,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -4452,6 +4440,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
in_subs->in_strategy&= ~SUBS_MATERIALIZATION; in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
else else
in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS; in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS;
DBUG_PRINT("info",
("mat_strategy_cost: %.2f, mat_cost: %.2f, write_cost: %.2f, lookup_cost: %.2f",
materialize_strategy_cost, materialization_cost, write_cost, lookup_cost));
DBUG_PRINT("info",
("inx_strategy_cost: %.2f, inner_read_time_2: %.2f",
in_exists_strategy_cost, inner_read_time_2));
DBUG_PRINT("info",("outer_lookup_keys: %.2f", outer_lookup_keys));
} }
/* /*
...@@ -4507,9 +4503,9 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -4507,9 +4503,9 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
const_tables != table_count) const_tables != table_count)
{ {
/* /*
The subquery was not reoptimized either because the user allowed only the The subquery was not reoptimized either because the user allowed only
IN-EXISTS strategy, or because materialization was not possible based on the IN-EXISTS strategy, or because materialization was not possible
semantic analysis. Clenup the original plan and reoptimize. based on semantic analysis. Cleanup the original plan and reoptimize.
*/ */
for (uint i= 0; i < table_count; i++) for (uint i= 0; i < table_count; i++)
{ {
......
...@@ -5969,7 +5969,7 @@ void JOIN::get_partial_cost_and_fanout(uint end_tab_idx, ...@@ -5969,7 +5969,7 @@ void JOIN::get_partial_cost_and_fanout(uint end_tab_idx,
} }
for (tab= first_depth_first_tab(this), i= const_tables; for (tab= first_depth_first_tab(this), i= const_tables;
tab; (i <= end_tab_idx && tab);
tab= next_depth_first_tab(this, tab), i++) tab= next_depth_first_tab(this, tab), i++)
{ {
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment