Commit 44570d2b authored by unknown's avatar unknown

MWL#89

Automerged with 5.3.
parents 4058115c a02682ab
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -13,4 +13,3 @@ kill : Bug#37780 2008-12-03 HHunger need some changes to be ...@@ -13,4 +13,3 @@ kill : Bug#37780 2008-12-03 HHunger need some changes to be
query_cache_28249 : Bug#43861 2009-03-25 main.query_cache_28249 fails sporadically query_cache_28249 : Bug#43861 2009-03-25 main.query_cache_28249 fails sporadically
log_tables-big : Bug#48646 2010-11-15 mattiasj report already exists log_tables-big : Bug#48646 2010-11-15 mattiasj report already exists
read_many_rows_innodb : Bug#37635 2010-11-15 mattiasj report already exists read_many_rows_innodb : Bug#37635 2010-11-15 mattiasj report already exists
main.subselect_mat_cost : MWL#89 tests that must be adjusted to the cost model introduced after the code review
# #
# Tets of cost-based choice between the materialization and in-to-exists # Tests of cost-based choice between the materialization and in-to-exists
# subquery execution strategies (MWL#89) # subquery execution strategies (MWL#89)
# #
# The test file is divided into two groups of tests:
# A. Typical cases when either of the two strategies is selected:
# 1. Subquery in disjunctive WHERE clause of the outer query.
# 2. NOT IN subqueries
# 3. Subqueries with GROUP BY, HAVING, and aggregate functions
# 4. Subqueries in the SELECT and HAVING clauses
# 5. Subqueries with UNION
# B. Reasonably exhaustive tests of the various combinations of optimizer
# switches, data distribution, available indexes, and typical queries.
#
-- echo TEST GROUP 1:
-- echo Typical cases of in-to-exists and materialization subquery strategies
-- echo =====================================================================
--disable_warnings
drop database if exists world;
--enable_warnings
set names utf8;
create database world;
use world;
--source include/world_schema.inc
--disable_query_log
--disable_result_log
--disable_warnings --disable_warnings
drop table if exists t1, t2, t1_1024, t2_1024; --source include/world.inc
drop procedure if exists make_t1_indexes;
drop procedure if exists make_t2_indexes;
drop procedure if exists remove_t1_indexes;
drop procedure if exists remove_t2_indexes;
drop procedure if exists add_materialization_data;
drop procedure if exists delete_materialization_data;
drop procedure if exists set_all_columns_not_null;
drop procedure if exists set_all_columns_nullable;
--enable_warnings --enable_warnings
--enable_result_log
--enable_query_log
-- echo Make the schema and data more diverse by adding more indexes, nullable
-- echo columns, and NULL data.
create index SurfaceArea on Country(SurfaceArea);
create index Language on CountryLanguage(Language);
create index CityName on City(Name);
alter table City change population population int(11) null default 0;
select max(id) from City into @max_city_id;
insert into City values (@max_city_id + 1,'Kilifarevo','BGR',NULL);
SELECT COUNT(*) FROM Country;
SELECT COUNT(*) FROM City;
SELECT COUNT(*) FROM CountryLanguage;
set @@optimizer_switch = 'in_to_exists=on,semijoin=on,materialization=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on';
-- echo
-- echo 1. Subquery in a disjunctive WHERE clause of the outer query.
-- echo
-- echo
-- echo Q1.1m:
-- echo MATERIALIZATION: there are too many rows in the outer query
-- echo to be looked up in the inner table.
EXPLAIN
SELECT Name FROM Country
WHERE (Code IN (select Country from City where City.Population > 100000) OR
Name LIKE 'L%') AND
surfacearea > 1000000;
SELECT Name FROM Country
WHERE (Code IN (select Country from City where City.Population > 100000) OR
Name LIKE 'L%') AND
surfacearea > 1000000;
-- echo Q1.1e:
-- echo IN-EXISTS: the materialization cost is the same as above, but
-- echo there are much fewer outer rows to be looked up, thus the
-- echo materialization cost is too high to compensate for fast lookups.
EXPLAIN
SELECT Name FROM Country
WHERE (Code IN (select Country from City where City.Population > 100000) OR
Name LIKE 'L%') AND
surfacearea > 10*1000000;
SELECT Name FROM Country
WHERE (Code IN (select Country from City where City.Population > 100000) OR
Name LIKE 'L%') AND
surfacearea > 10*1000000;
-- echo
-- echo Q1.2m:
-- echo MATERIALIZATION: the IN predicate is pushed (attached) to the last table
-- echo in the join order (Country, City), therefore there are too many row
-- echo combinations to filter by re-executing the subquery for each combination.
EXPLAIN
SELECT *
FROM Country, City
WHERE City.Country = Country.Code AND
Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
(City.Name IN
(select Language from CountryLanguage where Percentage > 50) OR
City.name LIKE '%Island%');
SELECT *
FROM Country, City
WHERE City.Country = Country.Code AND
Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
(City.Name IN
(select Language from CountryLanguage where Percentage > 50) OR
City.name LIKE '%Island%');
-- echo Q1.2e:
-- echo IN_EXISTS: join order is the same, but the left IN operand refers to
-- echo only the first table in the join order (Country), so there are much
-- echo fewer rows to filter by subquery re-execution.
EXPLAIN extended
SELECT *
FROM Country, City
WHERE City.Country = Country.Code AND
Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
(Country.Name IN
(select Language from CountryLanguage where Percentage > 50) OR
Country.name LIKE '%Island%');
SELECT *
FROM Country, City
WHERE City.Country = Country.Code AND
Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
(Country.Name IN
(select Language from CountryLanguage where Percentage > 50) OR
Country.name LIKE '%Island%');
-- echo
-- echo Q1.3:
-- echo For the same reasons as in Q2 IN-EXISTS and MATERIALIZATION chosen
-- echo for each respective subquery.
EXPLAIN
SELECT City.Name, Country.Name
FROM City,Country
WHERE City.Country = Country.Code AND
Country.SurfaceArea < 30000 AND Country.SurfaceArea > 10 AND
((Country.Code, Country.Name) IN
(select Country, Language from CountryLanguage where Percentage > 50) AND
Country.Population > 3000000
OR
(Country.Code, City.Name) IN
(select Country, Language from CountryLanguage));
SELECT City.Name, Country.Name
FROM City,Country
WHERE City.Country = Country.Code AND
Country.SurfaceArea < 30000 AND Country.SurfaceArea > 10 AND
((Country.Code, Country.Name) IN
(select Country, Language from CountryLanguage where Percentage > 50) AND
Country.Population > 3000000
OR
(Country.Code, City.Name) IN
(select Country, Language from CountryLanguage));
-- echo
-- echo 2. NOT IN subqueries
-- echo
-- echo
-- echo Q2.1:
-- echo Number of cities that are not capitals in countries with small population.
-- echo MATERIALIZATION is 50 times faster because the cost of each subquery
-- echo re-execution is much higher than the cost of index lookups into the
-- echo materialized subquery.
EXPLAIN
select count(*) from City
where City.id not in (select capital from Country
where capital is not null and population < 100000);
-- echo
-- echo Q2.2e:
-- echo Countries that speak French, but do not speak English
-- echo IN-EXISTS because the outer query filters many rows, thus
-- echo there are few lookups to make.
EXPLAIN
SELECT Country.Name
FROM Country, CountryLanguage
WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
AND CountryLanguage.Language = 'French'
AND Code = Country;
SELECT Country.Name
FROM Country, CountryLanguage
WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
AND CountryLanguage.Language = 'French'
AND Code = Country;
-- echo Q2.2m:
-- echo Countries that speak French OR Spanish, but do not speak English
-- echo MATERIALIZATION because the outer query filters less rows than Q5-a,
-- echo so there are more lookups.
EXPLAIN
SELECT Country.Name
FROM Country, CountryLanguage
WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
AND (CountryLanguage.Language = 'French' OR CountryLanguage.Language = 'Spanish')
AND Code = Country;
SELECT Country.Name
FROM Country, CountryLanguage
WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
AND (CountryLanguage.Language = 'French' OR CountryLanguage.Language = 'Spanish')
AND Code = Country;
-- echo
-- echo Q2.3e:
-- echo Not a very meaningful query that tests NOT IN.
-- echo IN-EXISTS because the outer query is cheap enough to reexecute many times.
EXPLAIN
select count(*)
from CountryLanguage
where (Language, Country) NOT IN
(SELECT City.Name, Country.Code
FROM City LEFT JOIN Country ON (Country = Code and City.Population < 10000));
select count(*)
from CountryLanguage
where (Language, Country) NOT IN
(SELECT City.Name, Country.Code
FROM City LEFT JOIN Country ON (Country = Code and City.Population < 10000));
-- echo Q2.3m:
-- echo MATERIALIZATION with the PARTIAL_MATCH_MERGE strategy, because the HAVING
-- echo clause prevents the use of the index on City(Name), and in practice reduces
-- echo radically the size of the temp table.
EXPLAIN
select count(*)
from CountryLanguage
where (Language, Country) NOT IN
(SELECT City.Name, Country.Code
FROM City LEFT JOIN Country ON (Country = Code)
HAVING City.Name LIKE "Santa%");
select count(*)
from CountryLanguage
where (Language, Country) NOT IN
(SELECT City.Name, Country.Code
FROM City LEFT JOIN Country ON (Country = Code)
HAVING City.Name LIKE "Santa%");
create table t1 (a1 char(8), a2 char(8), a3 char(8), a4 int); -- echo
insert into t1 values ('1 - 00', '2 - 00', '3 - 00', 0); -- echo 3. Subqueries with GROUP BY, HAVING, and aggregate functions
insert into t1 values ('1 - 01', '2 - 01', '3 - 01', 1); -- echo
create table t2 (b1 char(8), b2 char(8), b3 char(8), b4 int); -- echo Q3.1:
insert into t2 values ('1 - 01', '2 - 01', '3 - 01', 1); -- echo Languages that are spoken in countries with 10 or 11 languages
insert into t2 values ('1 - 01', '2 - 01', '3 - 02', 2); -- echo MATERIALIZATION is about 100 times faster than IN-EXISTS.
insert into t2 values ('1 - 02', '2 - 02', '3 - 03', 3);
insert into t2 values ('1 - 02', '2 - 02', '3 - 04', 4); EXPLAIN
insert into t2 values ('1 - 03', '2 - 03', '3 - 05', 5); select count(*)
from CountryLanguage
create table t1_1024 (a1 blob(1024), a2 blob(1024)); where
insert into t1_1024 values (concat('1 - 00', repeat('x', 1018)), concat('2 - 00', repeat('x', 1018))); (Country, 10) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
insert into t1_1024 values (concat('1 - 01', repeat('x', 1018)), concat('2 - 01', repeat('x', 1018))); WHERE Code = Country GROUP BY Code)
OR
create table t2_1024 (b1 blob(1024), b2 blob(1024)); (Country, 11) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
insert into t2_1024 values (concat('1 - 01', repeat('x', 1018)), concat('2 - 01', repeat('x', 1018))); WHERE Code = Country GROUP BY Code)
insert into t2_1024 values (concat('1 - 02', repeat('x', 1018)), concat('2 - 02', repeat('x', 1018))); order by Country;
insert into t2_1024 values (concat('1 - 03', repeat('x', 1018)), concat('2 - 03', repeat('x', 1018)));
insert into t2_1024 values (concat('1 - 04', repeat('x', 1018)), concat('2 - 04', repeat('x', 1018))); select count(*)
from CountryLanguage
delimiter |; where
create procedure make_t1_indexes() (Country, 10) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
begin WHERE Code = Country GROUP BY Code)
create index it1i1 on t1 (a1); OR
create index it1i2 on t1 (a2); (Country, 11) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
create index it1i3 on t1 (a1, a2); WHERE Code = Country GROUP BY Code)
create index it1_1024i1 on t1_1024 (a1(6)); order by Country;
create index it1_1024i2 on t1_1024 (a2(6));
create index it1_1024i3 on t1_1024 (a1(6), a2(6));
end| -- echo
-- echo Q3.2:
create procedure make_t2_indexes() -- echo Countries whose capital is a city name that names more than one
begin -- echo cities.
create index it2i1 on t2 (b1); -- echo MATERIALIZATION because the cost of single subquery execution is
create index it2i2 on t2 (b2); -- echo close to that of materializing the subquery.
create index it2i3 on t2 (b1, b2);
create unique index it2i4 on t2 (b1, b2, b3); EXPLAIN
create index it2_1024i1 on t2_1024 (b1(6)); select * from Country, City
create index it2_1024i2 on t2_1024 (b2(6)); where capital = id and
create index it2_1024i3 on t2_1024 (b1(6), b2(6)); (City.name in (SELECT name FROM City
end| GROUP BY name HAVING Count(*) > 2) OR
capital is null);
create procedure remove_t1_indexes()
begin select * from Country, City
drop index it1i1 on t1; where capital = id and
drop index it1i2 on t1; (City.name in (SELECT name FROM City
drop index it1i3 on t1; GROUP BY name HAVING Count(*) > 2) OR
drop index it1_1024i1 on t1_1024; capital is null);
drop index it1_1024i2 on t1_1024;
drop index it1_1024i3 on t1_1024; -- echo
end| -- echo Q3.3: MATERIALIZATION is 25 times faster than IN-EXISTS
create procedure remove_t2_indexes() EXPLAIN
begin SELECT Name
drop index it2i1 on t2; FROM Country
drop index it2i2 on t2; WHERE Country.Code NOT IN
drop index it2i3 on t2; (SELECT Country FROM City GROUP BY Name HAVING COUNT(Name) = 1);
drop index it2i4 on t2;
drop index it2_1024i1 on t2_1024; SELECT Name
drop index it2_1024i2 on t2_1024; FROM Country
drop index it2_1024i3 on t2_1024; WHERE Country.Code NOT IN
end| (SELECT Country FROM City GROUP BY Name HAVING COUNT(Name) = 1);
create procedure add_materialization_data()
begin -- echo
insert into t1 values ('1 - 03', '2 - 03', '3 - 03', 3); -- echo 4. Subqueries in the SELECT and HAVING clauses
insert into t1 values ('1 - 04', '2 - 04', '3 - 04', 4); -- echo
insert into t1 values ('1 - 05', '2 - 05', '3 - 05', 5);
insert into t1 values ('1 - 06', '2 - 06', '3 - 06', 6); -- echo Q4.1m:
insert into t1 values ('1 - 07', '2 - 07', '3 - 07', 7); -- echo Capital information about very big cities
insert into t1_1024 values (concat('1 - 03', repeat('x', 1018)), concat('2 - 03', repeat('x', 1018))); -- echo MATERIALIZATION
end| EXPLAIN
select Name, City.id in (select capital from Country where capital is not null) as is_capital
create procedure delete_materialization_data() from City
begin where City.population > 10000000;
delete from t1 where a1 >= '1 - 03';
delete from t1_1024 where a1 >= '1 - 03'; select Name, City.id in (select capital from Country where capital is not null) as is_capital
end| from City
where City.population > 10000000;
create procedure set_all_columns_not_null()
begin -- echo Q4.1e:
alter table t1 modify a1 char(8) not null, modify a2 char(8) not null, modify a3 char(8) not null; -- echo IN-TO-EXISTS after adding an index to make the subquery re-execution
alter table t2 modify b1 char(8) not null, modify b2 char(8) not null, modify b3 char(8) not null; -- echo efficient.
end|
create index CountryCapital on Country(capital);
create procedure set_all_columns_nullable()
begin EXPLAIN
alter table t1 modify a1 char(8) null, modify a2 char(8) null, modify a3 char(8) null; select Name, City.id in (select capital from Country where capital is not null) as is_capital
alter table t2 modify b1 char(8) null, modify b2 char(8) null, modify b3 char(8) null; from City
end| where City.population > 10000000;
delimiter ;| select Name, City.id in (select capital from Country where capital is not null) as is_capital
-- echo from City
where City.population > 10000000;
-- echo /******************************************************************************
-- echo 1. Both materialization and in-to-exists are ON, make a cost-based choice. drop index CountryCapital on Country;
-- echo ******************************************************************************/
set @@optimizer_switch='materialization=on,in_to_exists=on'; -- echo
-- echo -- echo Q4.2:
-- echo /* 1.1 In-to-exists is cheaper */ -- echo MATERIALIZATION
call make_t1_indexes(); # TODO: the cost estimates for subqueries in the HAVING clause need to be changed
# to take into account that the subquery predicate is executed #times ~ to the
-- echo /* 1.1.1 non-indexed table access */ # number of groups, not number of rows
-- source include/subselect_mat_cost.inc EXPLAIN
SELECT City.Name, City.Population
-- echo /* 1.1.2 indexed table access, nullabale columns. */ FROM City JOIN Country ON City.Country = Country.Code
call make_t2_indexes(); GROUP BY City.Name
-- source include/subselect_mat_cost.inc HAVING City.Name IN (select Name from Country where population < 1000000);
-- echo /* 1.1.3 indexed table access, non-nullabale columns. */ SELECT City.Name, City.Population
call set_all_columns_not_null(); FROM City JOIN Country ON City.Country = Country.Code
-- source include/subselect_mat_cost.inc GROUP BY City.Name
call set_all_columns_nullable(); HAVING City.Name IN (select Name from Country where population < 1000000);
-- echo
-- echo /* 1.2 Materialization is cheaper */ -- echo
# make materialization cheaper -- echo 5. Subqueries with UNION
call add_materialization_data(); -- echo
call remove_t1_indexes();
-- echo Q5.1:
-- echo /* 1.2.1 non-indexed table access */ EXPLAIN
call remove_t2_indexes(); SELECT * from City where (Name, 91) in
-- source include/subselect_mat_cost.inc (SELECT Name, round(Population/1000)
FROM City
-- echo /* 1.2.2 indexed table access, nullabale columns. */ WHERE Country = "IND" AND Population > 2500000
call make_t2_indexes(); UNION
-- source include/subselect_mat_cost.inc SELECT Name, round(Population/1000)
FROM City
-- echo /* 1.2.3 indexed table access, non-nullabale columns. */ WHERE Country = "IND" AND Population < 100000);
call set_all_columns_not_null();
-- source include/subselect_mat_cost.inc SELECT * from City where (Name, 91) in
call set_all_columns_nullable(); (SELECT Name, round(Population/1000)
FROM City
WHERE Country = "IND" AND Population > 2500000
insert into t1 values ('1 - 02', '2 - 02', '3 - 02', 2); UNION
SELECT Name, round(Population/1000)
-- echo /****************************************************************************** FROM City
-- echo 2. Materialization is OFF, in-to-exists is ON, materialization is cheaper. WHERE Country = "IND" AND Population < 100000);
-- echo ******************************************************************************/
set @@optimizer_switch='materialization=off,in_to_exists=on'; set @@optimizer_switch='default';
drop database world;
-- echo /* 2.1 non-indexed table access */ -- echo
call remove_t2_indexes();
-- source include/subselect_mat_cost.inc
-- echo
-- echo /* 2.2 indexed table access, nullabale columns. */ -- echo TEST GROUP 2:
call make_t2_indexes(); -- echo Tests of various combinations of optimizer switches, types of queries,
-- source include/subselect_mat_cost.inc -- echo available indexes, column nullability, constness of tables/predicates.
-- echo =====================================================================
-- echo /* 2.3 indexed table access, non-nullabale columns. */
call set_all_columns_not_null();
-- source include/subselect_mat_cost.inc #TODO From Igor's review:
call set_all_columns_nullable(); #
#2.1 Please add a case when two subqueries are used in the where clause
#(or in select) of a 2-way join.
-- echo /****************************************************************************** #The first subquery is accessed after the first table, while the second
-- echo 3. Materialization is ON, in-to-exists is OFF, in-to-exists is cheaper. #is accessed after the second table.
-- echo ******************************************************************************/ #
set @@optimizer_switch='materialization=on,in_to_exists=off'; #2.2. Please add a test case when one non-correlated subquery contains
# make IN-TO-EXISTS cheaper #another non-correlated subquery.
call delete_materialization_data(); #Consider 4 subcases:
call make_t1_indexes(); # both subqueries are materialized
# IN_EXIST transformations are applied to both subqueries
-- echo /* 3.1 non-indexed table access */ # outer subquery is materialized while the inner subquery is not
call remove_t2_indexes(); #(IN_EXIST transformation is applied to it)
-- source include/subselect_mat_cost.inc # inner subqyery is materialized while the outer subquery is not (
#IN_EXIST transformation is applied to it)
-- echo /* 3.2 indexed table access, nullabale columns. */
call make_t2_indexes();
-- source include/subselect_mat_cost.inc
-- echo /* 3.3 indexed table access, non-nullabale columns. */
call set_all_columns_not_null();
-- source include/subselect_mat_cost.inc
call set_all_columns_nullable();
drop procedure make_t1_indexes;
drop procedure make_t2_indexes;
drop procedure remove_t1_indexes;
drop procedure remove_t2_indexes;
drop procedure add_materialization_data;
drop procedure delete_materialization_data;
drop procedure set_all_columns_not_null;
drop procedure set_all_columns_nullable;
drop table t1, t2, t1_1024, t2_1024;
...@@ -38,11 +38,14 @@ Item_subselect::Item_subselect(): ...@@ -38,11 +38,14 @@ Item_subselect::Item_subselect():
Item_result_field(), value_assigned(0), own_engine(0), thd(0), old_engine(0), Item_result_field(), value_assigned(0), own_engine(0), thd(0), old_engine(0),
used_tables_cache(0), have_to_be_excluded(0), const_item_cache(1), used_tables_cache(0), have_to_be_excluded(0), const_item_cache(1),
inside_first_fix_fields(0), done_first_fix_fields(FALSE), inside_first_fix_fields(0), done_first_fix_fields(FALSE),
substitution(0), expr_cache(0), engine(0), forced_const(FALSE), eliminated(FALSE), expr_cache(0), forced_const(FALSE), substitution(0), engine(0), eliminated(FALSE),
engine_changed(0), changed(0), is_correlated(FALSE) engine_changed(0), changed(0), is_correlated(FALSE)
{ {
DBUG_ENTER("Item_subselect::Item_subselect"); DBUG_ENTER("Item_subselect::Item_subselect");
DBUG_PRINT("enter", ("this: 0x%lx", (ulong) this)); DBUG_PRINT("enter", ("this: 0x%lx", (ulong) this));
#ifndef DBUG_OFF
exec_counter= 0;
#endif
with_subselect= 1; with_subselect= 1;
reset(); reset();
/* /*
...@@ -130,6 +133,10 @@ void Item_subselect::cleanup() ...@@ -130,6 +133,10 @@ void Item_subselect::cleanup()
value_assigned= 0; value_assigned= 0;
expr_cache= 0; expr_cache= 0;
forced_const= FALSE; forced_const= FALSE;
DBUG_PRINT("info", ("exec_counter: %d", exec_counter));
#ifndef DBUG_OFF
exec_counter= 0;
#endif
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
...@@ -548,7 +555,9 @@ bool Item_subselect::exec() ...@@ -548,7 +555,9 @@ bool Item_subselect::exec()
DBUG_EXECUTE_IF("subselect_exec_fail", return 1;); DBUG_EXECUTE_IF("subselect_exec_fail", return 1;);
res= engine->exec(); res= engine->exec();
#ifndef DBUG_OFF
++exec_counter;
#endif
if (engine_changed) if (engine_changed)
{ {
engine_changed= 0; engine_changed= 0;
......
...@@ -52,6 +52,17 @@ protected: ...@@ -52,6 +52,17 @@ protected:
bool inside_first_fix_fields; bool inside_first_fix_fields;
bool done_first_fix_fields; bool done_first_fix_fields;
Item *expr_cache;
/*
Set to TRUE if at optimization or execution time we determine that this
item's value is a constant. We need this member because it is not possible
to substitute 'this' with a constant item.
*/
bool forced_const;
#ifndef DBUG_OFF
/* Count the number of times this subquery predicate has been executed. */
uint exec_counter;
#endif
public: public:
/* /*
Used inside Item_subselect::fix_fields() according to this scenario: Used inside Item_subselect::fix_fields() according to this scenario:
...@@ -66,19 +77,13 @@ public: ...@@ -66,19 +77,13 @@ public:
substitution= NULL; substitution= NULL;
< Item_subselect::fix_fields < Item_subselect::fix_fields
*/ */
/* TODO make this protected member again. */
Item *substitution; Item *substitution;
/* unit of subquery */
st_select_lex_unit *unit;
Item *expr_cache;
/* engine that perform execution of subselect (single select or union) */ /* engine that perform execution of subselect (single select or union) */
/* TODO make this protected member again. */
subselect_engine *engine; subselect_engine *engine;
/* /* unit of subquery */
Set to TRUE if at optimization or execution time we determine that this st_select_lex_unit *unit;
item's value is a constant. We need this member because it is not possible
to substitute 'this' with a constant item.
*/
bool forced_const;
/* A reference from inside subquery predicate to somewhere outside of it */ /* A reference from inside subquery predicate to somewhere outside of it */
class Ref_to_outside : public Sql_alloc class Ref_to_outside : public Sql_alloc
{ {
......
...@@ -4341,8 +4341,6 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -4341,8 +4341,6 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
{ {
JOIN *outer_join; JOIN *outer_join;
JOIN *inner_join= this; JOIN *inner_join= this;
/* Number of (partial) rows of the outer JOIN filtered by the IN predicate. */
double outer_record_count;
/* Number of unique value combinations filtered by the IN predicate. */ /* Number of unique value combinations filtered by the IN predicate. */
double outer_lookup_keys; double outer_lookup_keys;
/* Cost and row count of the unmodified subquery. */ /* Cost and row count of the unmodified subquery. */
...@@ -4362,38 +4360,37 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -4362,38 +4360,37 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
outer_join= unit->outer_select() ? unit->outer_select()->join : NULL; outer_join= unit->outer_select() ? unit->outer_select()->join : NULL;
if (outer_join) if (outer_join)
{ {
uint outer_partial_plan_len; /*
The index of the last JOIN_TAB in the outer JOIN where in_subs is
attached (pushed to).
*/
uint max_outer_join_tab_idx;
/* /*
Make_cond_for_table is called for predicates only in the WHERE/ON Make_cond_for_table is called for predicates only in the WHERE/ON
clauses. In all other cases, predicates are not pushed to any clauses. In all other cases, predicates are not pushed to any
JOIN_TAB, and their joi_tab_idx remains MAX_TABLES. Such predicates JOIN_TAB, and their join_tab_idx remains MAX_TABLES. Such predicates
are evaluated for each complete row of the outer join. are evaluated for each complete row of the outer join.
*/ */
outer_partial_plan_len= (in_subs->get_join_tab_idx() == MAX_TABLES) ? DBUG_ASSERT(outer_join->table_count > 0);
outer_join->table_count : max_outer_join_tab_idx= (in_subs->get_join_tab_idx() == MAX_TABLES) ?
in_subs->get_join_tab_idx() + 1; outer_join->table_count - 1:
outer_join->get_partial_cost_and_fanout(outer_partial_plan_len, in_subs->get_join_tab_idx();
/*
TODO:
Currently outer_lookup_keys is computed as the number of rows in
the partial join including the JOIN_TAB where the IN predicate is
pushed to. In the general case this is a gross overestimate because
due to caching we are interested only in the number of unique keys.
The search key may be formed by columns from much fewer than all
tables in the partial join. Example:
select * from t1, t2 where t1.c1 = t2.key AND t2.c2 IN (select ...);
If the join order: t1, t2, the number of unique lookup keys is ~ to
the number of unique values t2.c2 in the partial join t1 join t2.
*/
outer_join->get_partial_cost_and_fanout(max_outer_join_tab_idx,
table_map(-1), table_map(-1),
&dummy, &dummy,
&outer_record_count); &outer_lookup_keys);
if (outer_join->table_count > outer_join->const_tables)
{
outer_join->get_partial_cost_and_fanout(outer_partial_plan_len,
in_subs->used_tables(),
&dummy,
&outer_lookup_keys);
/*
outer_lookup_keys= prev_record_reads(outer_join->best_positions,
outer_partial_plan_len,
in_subs->used_tables());
*/
}
else
{
/* If all tables are constant, positions is undefined. */
outer_lookup_keys= 1;
}
} }
else else
{ {
...@@ -4401,17 +4398,8 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -4401,17 +4398,8 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
TODO: outer_join can be NULL for DELETE statements. TODO: outer_join can be NULL for DELETE statements.
How to compute its cost? How to compute its cost?
*/ */
outer_record_count= 1; outer_lookup_keys= 1;
outer_lookup_keys=1;
} }
/*
There cannot be more lookup keys than the total number of records.
TODO: this a temporary solution until we find a better way to compute
get_partial_join_cost() and prev_record_reads() in a consitent manner,
where it is guaranteed that (outer_lookup_keys <= outer_record_count).
*/
if (outer_lookup_keys > outer_record_count)
outer_lookup_keys= outer_record_count;
/* /*
B. Estimate the cost and number of records of the subquery both B. Estimate the cost and number of records of the subquery both
...@@ -4459,7 +4447,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -4459,7 +4447,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
write_cost * inner_record_count_1; write_cost * inner_record_count_1;
materialize_strategy_cost= materialization_cost + materialize_strategy_cost= materialization_cost +
outer_record_count * lookup_cost; outer_lookup_keys * lookup_cost;
/* C.2 Compute the cost of the IN=>EXISTS strategy. */ /* C.2 Compute the cost of the IN=>EXISTS strategy. */
in_exists_strategy_cost= outer_lookup_keys * inner_read_time_2; in_exists_strategy_cost= outer_lookup_keys * inner_read_time_2;
...@@ -4469,6 +4457,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -4469,6 +4457,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
in_subs->in_strategy&= ~SUBS_MATERIALIZATION; in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
else else
in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS; in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS;
DBUG_PRINT("info",
("mat_strategy_cost: %.2f, mat_cost: %.2f, write_cost: %.2f, lookup_cost: %.2f",
materialize_strategy_cost, materialization_cost, write_cost, lookup_cost));
DBUG_PRINT("info",
("inx_strategy_cost: %.2f, inner_read_time_2: %.2f",
in_exists_strategy_cost, inner_read_time_2));
DBUG_PRINT("info",("outer_lookup_keys: %.2f", outer_lookup_keys));
} }
/* /*
...@@ -4524,9 +4520,9 @@ bool JOIN::choose_subquery_plan(table_map join_tables) ...@@ -4524,9 +4520,9 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
const_tables != table_count) const_tables != table_count)
{ {
/* /*
The subquery was not reoptimized either because the user allowed only the The subquery was not reoptimized either because the user allowed only
IN-EXISTS strategy, or because materialization was not possible based on the IN-EXISTS strategy, or because materialization was not possible
semantic analysis. Clenup the original plan and reoptimize. based on semantic analysis. Cleanup the original plan and reoptimize.
*/ */
for (uint i= 0; i < table_count; i++) for (uint i= 0; i < table_count; i++)
{ {
......
...@@ -6045,7 +6045,7 @@ void JOIN::get_partial_cost_and_fanout(uint end_tab_idx, ...@@ -6045,7 +6045,7 @@ void JOIN::get_partial_cost_and_fanout(uint end_tab_idx,
} }
for (tab= first_depth_first_tab(this), i= const_tables; for (tab= first_depth_first_tab(this), i= const_tables;
tab; (i <= end_tab_idx && tab);
tab= next_depth_first_tab(this, tab), i++) tab= next_depth_first_tab(this, tab), i++)
{ {
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment