MWL#89

- Added regression test with queries over the WORLD database. - Discovered and fixed several bugs in the related cost calculation functionality both in the semijoin and non-semijon subquery code. - Added DBUG printing of the cost variables used to decide between IN-EXISTS and MATERIALIZATION.

MWL#89
- Added regression test with queries over the WORLD database. - Discovered and fixed several bugs in the related cost calculation functionality both in the semijoin and non-semijon subquery code. - Added DBUG printing of the cost variables used to decide between IN-EXISTS and MATERIALIZATION.
a02682ab · unknown · 0cf912c2 · a02682ab · a02682ab · a02682ab
Commit a02682ab authored Jun 21, 2011 by unknown
7 changed files
--- a/mysql-test/r/subselect_mat_cost.result
+++ b/mysql-test/r/subselect_mat_cost.result
--- a/mysql-test/t/disabled.def
+++ b/mysql-test/t/disabled.def
@@ -13,4 +13,3 @@ kill                     : Bug#37780 2008-12-03 HHunger need some changes to be
 query_cache_28249        : Bug#43861 2009-03-25 main.query_cache_28249 fails sporadically
 log_tables-big           : Bug#48646 2010-11-15 mattiasj report already exists
 read_many_rows_innodb    : Bug#37635 2010-11-15 mattiasj report already exists
-main.subselect_mat_cost  : MWL#89 tests that must be adjusted to the cost model introduced after the code review
--- a/mysql-test/t/subselect_mat_cost.test
+++ b/mysql-test/t/subselect_mat_cost.test
-#
-# Tets of cost-based choice between the materialization and in-to-exists
+	#
+# Tests of cost-based choice between the materialization and in-to-exists
 # subquery execution strategies (MWL#89)
 #
+# The test file is divided into two groups of tests:
+# A. Typical cases when either of the two strategies is selected:
+#    1. Subquery in disjunctive WHERE clause of the outer query.
+#    2. NOT IN subqueries
+#    3. Subqueries with GROUP BY, HAVING, and aggregate functions
+#    4. Subqueries in the SELECT and HAVING clauses
+#    5. Subqueries with UNION
+# B. Reasonably exhaustive tests of the various combinations of optimizer
+#    switches, data distribution, available indexes, and typical queries.
+#
+
+
+-- echo TEST GROUP 1:
+-- echo Typical cases of in-to-exists and materialization subquery strategies
+-- echo =====================================================================
+
+--disable_warnings
+drop database if exists world;
+--enable_warnings
+
+set names utf8;
+
+create database world;
+use world;

+--source include/world_schema.inc
+--disable_query_log
+--disable_result_log
 --disable_warnings
-drop table if exists t1, t2, t1_1024, t2_1024;
-drop procedure if exists make_t1_indexes;
-drop procedure if exists make_t2_indexes;
-drop procedure if exists remove_t1_indexes;
-drop procedure if exists remove_t2_indexes;
-drop procedure if exists add_materialization_data;
-drop procedure if exists delete_materialization_data;
-drop procedure if exists set_all_columns_not_null;
-drop procedure if exists set_all_columns_nullable;
+--source include/world.inc
 --enable_warnings
+--enable_result_log
+--enable_query_log
+
+-- echo Make the schema and data more diverse by adding more indexes, nullable
+-- echo columns, and NULL data.
+create index SurfaceArea on Country(SurfaceArea);
+create index Language on CountryLanguage(Language);
+create index CityName on City(Name);
+alter table City change population population int(11) null default 0;
+
+select max(id) from City into @max_city_id;
+insert into City values (@max_city_id + 1,'Kilifarevo','BGR',NULL);
+
+
+SELECT COUNT(*) FROM Country;
+SELECT COUNT(*) FROM City;
+SELECT COUNT(*) FROM CountryLanguage;
+
+set @@optimizer_switch = 'in_to_exists=on,semijoin=on,materialization=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on';
+
+-- echo
+-- echo 1. Subquery in a disjunctive WHERE clause of the outer query.
+-- echo
+
+-- echo
+-- echo Q1.1m:
+-- echo MATERIALIZATION: there are too many rows in the outer query
+-- echo to be looked up in the inner table.
+EXPLAIN
+SELECT Name FROM Country
+WHERE (Code IN (select Country from City where City.Population > 100000) OR
+       Name LIKE 'L%') AND
+      surfacearea > 1000000;
+
+SELECT Name FROM Country
+WHERE (Code IN (select Country from City where City.Population > 100000) OR
+       Name LIKE 'L%') AND
+      surfacearea > 1000000;
+
+-- echo Q1.1e:
+-- echo IN-EXISTS: the materialization cost is the same as above, but
+-- echo there are much fewer outer rows to be looked up, thus the
+-- echo materialization cost is too high to compensate for fast lookups.
+EXPLAIN
+SELECT Name FROM Country
+WHERE (Code IN (select Country from City where City.Population > 100000) OR
+       Name LIKE 'L%') AND
+      surfacearea > 10*1000000;
+
+SELECT Name FROM Country
+WHERE (Code IN (select Country from City where City.Population > 100000) OR
+       Name LIKE 'L%') AND
+      surfacearea > 10*1000000;
+
+-- echo
+-- echo Q1.2m:
+-- echo MATERIALIZATION: the IN predicate is pushed (attached) to the last table
+-- echo in the join order (Country, City), therefore there are too many row
+-- echo combinations to filter by re-executing the subquery for each combination.
+EXPLAIN
+SELECT *
+  FROM Country, City
+  WHERE City.Country = Country.Code AND
+        Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
+        (City.Name IN
+         (select Language from CountryLanguage where Percentage > 50) OR
+         City.name LIKE '%Island%');
+
+SELECT *
+  FROM Country, City
+  WHERE City.Country = Country.Code AND
+        Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
+        (City.Name IN
+         (select Language from CountryLanguage where Percentage > 50) OR
+         City.name LIKE '%Island%');
+
+-- echo Q1.2e:
+-- echo IN_EXISTS: join order is the same, but the left IN operand refers to
+-- echo only the first table in the join order (Country), so there are much
+-- echo fewer rows to filter by subquery re-execution.
+EXPLAIN extended
+SELECT *
+  FROM Country, City
+  WHERE City.Country = Country.Code AND
+        Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
+        (Country.Name IN
+         (select Language from CountryLanguage where Percentage > 50) OR
+         Country.name LIKE '%Island%');
+
+SELECT *
+  FROM Country, City
+  WHERE City.Country = Country.Code AND
+        Country.SurfaceArea < 3000 AND Country.SurfaceArea > 10 AND
+        (Country.Name IN
+         (select Language from CountryLanguage where Percentage > 50) OR
+         Country.name LIKE '%Island%');
+
+
+-- echo
+-- echo Q1.3:
+-- echo For the same reasons as in Q2 IN-EXISTS and MATERIALIZATION chosen
+-- echo for each respective subquery.
+EXPLAIN
+SELECT City.Name, Country.Name
+  FROM City,Country
+  WHERE City.Country = Country.Code AND
+        Country.SurfaceArea < 30000 AND Country.SurfaceArea > 10 AND
+        ((Country.Code, Country.Name) IN
+         (select Country, Language from CountryLanguage where Percentage > 50) AND
+         Country.Population > 3000000
+         OR
+         (Country.Code, City.Name) IN
+         (select Country, Language from CountryLanguage));
+
+SELECT City.Name, Country.Name
+  FROM City,Country
+  WHERE City.Country = Country.Code AND
+        Country.SurfaceArea < 30000 AND Country.SurfaceArea > 10 AND
+        ((Country.Code, Country.Name) IN
+         (select Country, Language from CountryLanguage where Percentage > 50) AND
+         Country.Population > 3000000
+         OR
+         (Country.Code, City.Name) IN
+         (select Country, Language from CountryLanguage));
+
+
+-- echo
+-- echo 2. NOT IN subqueries
+-- echo
+
+-- echo
+-- echo Q2.1:
+-- echo Number of cities that are not capitals in countries with small population.
+-- echo MATERIALIZATION is 50 times faster because the cost of each subquery
+-- echo re-execution is much higher than the cost of index lookups into the
+-- echo materialized subquery.
+
+EXPLAIN
+select count(*) from City
+where City.id not in (select capital from Country
+                      where capital is not null and population < 100000);
+
+-- echo
+-- echo Q2.2e:
+-- echo Countries that speak French, but do not speak English
+-- echo IN-EXISTS because the outer query filters many rows, thus
+-- echo there are few lookups to make.
+EXPLAIN
+SELECT Country.Name
+FROM Country, CountryLanguage 
+WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
+  AND CountryLanguage.Language = 'French'
+  AND Code = Country;
+
+SELECT Country.Name
+FROM Country, CountryLanguage 
+WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
+  AND CountryLanguage.Language = 'French'
+  AND Code = Country;
+
+-- echo Q2.2m:
+-- echo Countries that speak French OR Spanish, but do not speak English
+-- echo MATERIALIZATION because the outer query filters less rows than Q5-a,
+-- echo so there are more lookups.
+EXPLAIN
+SELECT Country.Name
+FROM Country, CountryLanguage 
+WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
+  AND (CountryLanguage.Language = 'French' OR CountryLanguage.Language = 'Spanish')
+  AND Code = Country;
+
+SELECT Country.Name
+FROM Country, CountryLanguage 
+WHERE Code NOT IN (SELECT Country FROM CountryLanguage WHERE Language = 'English')
+  AND (CountryLanguage.Language = 'French' OR CountryLanguage.Language = 'Spanish')
+  AND Code = Country;
+
+-- echo
+-- echo Q2.3e:
+-- echo Not a very meaningful query that tests NOT IN.
+-- echo IN-EXISTS because the outer query is cheap enough to reexecute many times.
+EXPLAIN
+select count(*)
+from CountryLanguage
+where (Language, Country) NOT IN
+      (SELECT City.Name, Country.Code
+       FROM City LEFT JOIN Country ON (Country = Code and City.Population < 10000));
+
+select count(*)
+from CountryLanguage
+where (Language, Country) NOT IN
+      (SELECT City.Name, Country.Code
+       FROM City LEFT JOIN Country ON (Country = Code and City.Population < 10000));
+
+-- echo Q2.3m:
+-- echo MATERIALIZATION with the PARTIAL_MATCH_MERGE strategy, because the HAVING
+-- echo clause prevents the use of the index on City(Name), and in practice reduces
+-- echo radically the size of the temp table.
+EXPLAIN
+select count(*)
+from CountryLanguage
+where (Language, Country) NOT IN
+      (SELECT City.Name, Country.Code
+       FROM City LEFT JOIN Country ON (Country = Code)
+       HAVING City.Name LIKE "Santa%");
+
+select count(*)
+from CountryLanguage
+where (Language, Country) NOT IN
+      (SELECT City.Name, Country.Code
+       FROM City LEFT JOIN Country ON (Country = Code)
+       HAVING City.Name LIKE "Santa%");
+

-create table t1 (a1 char(8), a2 char(8), a3 char(8), a4 int);
-insert into t1 values ('1 - 00', '2 - 00', '3 - 00', 0);
-insert into t1 values ('1 - 01', '2 - 01', '3 - 01', 1);
-
-create table t2 (b1 char(8), b2 char(8), b3 char(8), b4 int);
-insert into t2 values ('1 - 01', '2 - 01', '3 - 01', 1);
-insert into t2 values ('1 - 01', '2 - 01', '3 - 02', 2);
-insert into t2 values ('1 - 02', '2 - 02', '3 - 03', 3);
-insert into t2 values ('1 - 02', '2 - 02', '3 - 04', 4);
-insert into t2 values ('1 - 03', '2 - 03', '3 - 05', 5);
-
-create table t1_1024 (a1 blob(1024), a2 blob(1024));
-insert into t1_1024 values (concat('1 - 00', repeat('x', 1018)), concat('2 - 00', repeat('x', 1018)));
-insert into t1_1024 values (concat('1 - 01', repeat('x', 1018)), concat('2 - 01', repeat('x', 1018)));
-
-create table t2_1024 (b1 blob(1024), b2 blob(1024));
-insert into t2_1024 values (concat('1 - 01', repeat('x', 1018)), concat('2 - 01', repeat('x', 1018)));
-insert into t2_1024 values (concat('1 - 02', repeat('x', 1018)), concat('2 - 02', repeat('x', 1018)));
-insert into t2_1024 values (concat('1 - 03', repeat('x', 1018)), concat('2 - 03', repeat('x', 1018)));
-insert into t2_1024 values (concat('1 - 04', repeat('x', 1018)), concat('2 - 04', repeat('x', 1018)));
-
-delimiter |;
-create procedure make_t1_indexes()
-begin
-  create index it1i1 on t1 (a1);
-  create index it1i2 on t1 (a2);
-  create index it1i3 on t1 (a1, a2);
-  create index it1_1024i1 on t1_1024 (a1(6));
-  create index it1_1024i2 on t1_1024 (a2(6));
-  create index it1_1024i3 on t1_1024 (a1(6), a2(6));
-end|
-
-create procedure make_t2_indexes()
-begin
-  create index it2i1 on t2 (b1);
-  create index it2i2 on t2 (b2);
-  create index it2i3 on t2 (b1, b2);
-  create unique index it2i4 on t2 (b1, b2, b3);
-  create index it2_1024i1 on t2_1024 (b1(6));
-  create index it2_1024i2 on t2_1024 (b2(6));
-  create index it2_1024i3 on t2_1024 (b1(6), b2(6));
-end|
-
-create procedure remove_t1_indexes()
-begin
-  drop index it1i1 on t1;
-  drop index it1i2 on t1;
-  drop index it1i3 on t1;
-  drop index it1_1024i1 on t1_1024;
-  drop index it1_1024i2 on t1_1024;
-  drop index it1_1024i3 on t1_1024;
-end|
-
-create procedure remove_t2_indexes()
-begin
-  drop index it2i1 on t2;
-  drop index it2i2 on t2;
-  drop index it2i3 on t2;
-  drop index it2i4 on t2;
-  drop index it2_1024i1 on t2_1024;
-  drop index it2_1024i2 on t2_1024;
-  drop index it2_1024i3 on t2_1024;
-end|
-
-create procedure add_materialization_data()
-begin
-insert into t1 values ('1 - 03', '2 - 03', '3 - 03', 3);
-insert into t1 values ('1 - 04', '2 - 04', '3 - 04', 4);
-insert into t1 values ('1 - 05', '2 - 05', '3 - 05', 5);
-insert into t1 values ('1 - 06', '2 - 06', '3 - 06', 6);
-insert into t1 values ('1 - 07', '2 - 07', '3 - 07', 7);
-insert into t1_1024 values (concat('1 - 03', repeat('x', 1018)), concat('2 - 03', repeat('x', 1018)));
-end|
-
-create procedure delete_materialization_data()
-begin
-delete from t1 where a1 >= '1 - 03';
-delete from t1_1024 where a1 >= '1 - 03';
-end|
-
-create procedure set_all_columns_not_null()
-begin
-alter table t1 modify a1 char(8) not null, modify a2 char(8) not null, modify a3 char(8) not null;
-alter table t2 modify b1 char(8) not null, modify b2 char(8) not null, modify b3 char(8) not null;
-end|
-
-create procedure set_all_columns_nullable()
-begin
-alter table t1 modify a1 char(8) null, modify a2 char(8) null, modify a3 char(8) null;
-alter table t2 modify b1 char(8) null, modify b2 char(8) null, modify b3 char(8) null;
-end|
-
-delimiter ;|
-- echo
-
-- echo /******************************************************************************
-- echo 1. Both materialization and in-to-exists are ON, make a cost-based choice.
-- echo ******************************************************************************/
-set @@optimizer_switch='materialization=on,in_to_exists=on';
-- echo 
-- echo /* 1.1 In-to-exists is cheaper */
-call make_t1_indexes();
-
-- echo /* 1.1.1 non-indexed table access */
-- source include/subselect_mat_cost.inc
-
-- echo /* 1.1.2 indexed table access, nullabale columns. */
-call make_t2_indexes();
-- source include/subselect_mat_cost.inc
-
-- echo /* 1.1.3 indexed table access, non-nullabale columns. */
-call set_all_columns_not_null();
-- source include/subselect_mat_cost.inc
-call set_all_columns_nullable();
-
-- echo 
-- echo /* 1.2 Materialization is cheaper */
-# make materialization cheaper
-call add_materialization_data();
-call remove_t1_indexes();
-
-- echo /* 1.2.1 non-indexed table access */
-call remove_t2_indexes();
-- source include/subselect_mat_cost.inc
-
-- echo /* 1.2.2 indexed table access, nullabale columns. */
-call make_t2_indexes();
-- source include/subselect_mat_cost.inc
-
-- echo /* 1.2.3 indexed table access, non-nullabale columns. */
-call set_all_columns_not_null();
-- source include/subselect_mat_cost.inc
-call set_all_columns_nullable();
-
-
-insert into t1 values ('1 - 02', '2 - 02', '3 - 02', 2);
-
-- echo /******************************************************************************
-- echo 2. Materialization is OFF, in-to-exists is ON, materialization is cheaper.
-- echo ******************************************************************************/
-set @@optimizer_switch='materialization=off,in_to_exists=on';
-
-- echo /* 2.1 non-indexed table access */
-call remove_t2_indexes();
-- source include/subselect_mat_cost.inc
-
-- echo /* 2.2 indexed table access, nullabale columns. */
-call make_t2_indexes();
-- source include/subselect_mat_cost.inc
-
-- echo /* 2.3 indexed table access, non-nullabale columns. */
-call set_all_columns_not_null();
-- source include/subselect_mat_cost.inc
-call set_all_columns_nullable();
-
-
-- echo /******************************************************************************
-- echo 3. Materialization is ON, in-to-exists is OFF, in-to-exists is cheaper.
-- echo ******************************************************************************/
-set @@optimizer_switch='materialization=on,in_to_exists=off';
-# make IN-TO-EXISTS cheaper
-call delete_materialization_data();
-call make_t1_indexes();
-
-- echo /* 3.1 non-indexed table access */
-call remove_t2_indexes();
-- source include/subselect_mat_cost.inc
-
-- echo /* 3.2 indexed table access, nullabale columns. */
-call make_t2_indexes();
-- source include/subselect_mat_cost.inc
-
-- echo /* 3.3 indexed table access, non-nullabale columns. */
-call set_all_columns_not_null();
-- source include/subselect_mat_cost.inc
-call set_all_columns_nullable();
-
-
-drop procedure make_t1_indexes;
-drop procedure make_t2_indexes;
-drop procedure remove_t1_indexes;
-drop procedure remove_t2_indexes;
-drop procedure add_materialization_data;
-drop procedure delete_materialization_data;
-drop procedure set_all_columns_not_null;
-drop procedure set_all_columns_nullable;
-drop table t1, t2, t1_1024, t2_1024;
+-- echo
+-- echo 3. Subqueries with GROUP BY, HAVING, and aggregate functions
+-- echo
+
+-- echo Q3.1:
+-- echo Languages that are spoken in countries with 10 or 11 languages
+-- echo MATERIALIZATION is about 100 times faster than IN-EXISTS.
+
+EXPLAIN
+select count(*)
+from CountryLanguage
+where
+(Country, 10) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
+                  WHERE Code = Country GROUP BY Code)
+OR
+(Country, 11) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
+                  WHERE Code = Country GROUP BY Code)
+order by Country;
+
+select count(*)
+from CountryLanguage
+where
+(Country, 10) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
+                  WHERE Code = Country GROUP BY Code)
+OR
+(Country, 11) IN (SELECT Code, COUNT(*) FROM CountryLanguage, Country
+                  WHERE Code = Country GROUP BY Code)
+order by Country;
+
+
+-- echo
+-- echo Q3.2:
+-- echo Countries whose capital is a city name that names more than one
+-- echo cities.
+-- echo MATERIALIZATION because the cost of single subquery execution is
+-- echo close to that of materializing the subquery.
+
+EXPLAIN
+select * from Country, City
+where capital = id and
+      (City.name in (SELECT name FROM City
+                     GROUP BY name HAVING Count(*) > 2) OR
+       capital is null);
+
+select * from Country, City
+where capital = id and
+      (City.name in (SELECT name FROM City
+                     GROUP BY name HAVING Count(*) > 2) OR
+       capital is null);
+
+-- echo
+-- echo Q3.3: MATERIALIZATION is 25 times faster than IN-EXISTS
+
+EXPLAIN
+SELECT Name
+FROM Country
+WHERE Country.Code NOT IN
+      (SELECT Country FROM City GROUP BY Name HAVING COUNT(Name) = 1);
+
+SELECT Name
+FROM Country
+WHERE Country.Code NOT IN
+      (SELECT Country FROM City GROUP BY Name HAVING COUNT(Name) = 1);
+
+
+-- echo
+-- echo 4. Subqueries in the SELECT and HAVING clauses
+-- echo
+
+-- echo Q4.1m:
+-- echo Capital information about very big cities
+-- echo MATERIALIZATION
+EXPLAIN
+select Name, City.id in (select capital from Country where capital is not null) as is_capital
+from City
+where City.population > 10000000;
+
+select Name, City.id in (select capital from Country where capital is not null) as is_capital
+from City
+where City.population > 10000000;
+
+-- echo Q4.1e:
+-- echo IN-TO-EXISTS after adding an index to make the subquery re-execution
+-- echo efficient.
+
+create index CountryCapital on Country(capital);
+
+EXPLAIN
+select Name, City.id in (select capital from Country where capital is not null) as is_capital
+from City
+where City.population > 10000000;
+
+select Name, City.id in (select capital from Country where capital is not null) as is_capital
+from City
+where City.population > 10000000;
+
+drop index CountryCapital on Country;
+
+-- echo
+-- echo Q4.2:
+-- echo MATERIALIZATION
+# TODO: the cost estimates for subqueries in the HAVING clause need to be changed
+# to take into account that the subquery predicate is executed #times ~ to the
+# number of groups, not number of rows
+EXPLAIN
+SELECT City.Name, City.Population
+FROM City JOIN Country ON City.Country = Country.Code
+GROUP BY City.Name
+HAVING City.Name IN (select Name from Country where population < 1000000);
+
+SELECT City.Name, City.Population
+FROM City JOIN Country ON City.Country = Country.Code
+GROUP BY City.Name
+HAVING City.Name IN (select Name from Country where population < 1000000);
+
+
+-- echo
+-- echo 5. Subqueries with UNION
+-- echo
+
+-- echo Q5.1:
+EXPLAIN
+SELECT * from City where (Name, 91) in
+(SELECT Name, round(Population/1000)
+ FROM City
+ WHERE Country = "IND" AND Population > 2500000
+UNION
+ SELECT Name, round(Population/1000)
+ FROM City
+ WHERE Country = "IND" AND Population < 100000);
+
+SELECT * from City where (Name, 91) in
+(SELECT Name, round(Population/1000)
+ FROM City
+ WHERE Country = "IND" AND Population > 2500000
+UNION
+ SELECT Name, round(Population/1000)
+ FROM City
+ WHERE Country = "IND" AND Population < 100000);
+
+set @@optimizer_switch='default';
+drop database world;
+-- echo
+
+
+-- echo
+-- echo TEST GROUP 2:
+-- echo Tests of various combinations of optimizer switches, types of queries,
+-- echo available indexes, column nullability, constness of tables/predicates.
+-- echo =====================================================================
+
+
+#TODO From Igor's review:
+#
+#2.1 Please add a case when two subqueries  are used in the where clause
+#(or in select) of a 2-way join.
+#The first subquery is accessed after the first table, while the second
+#is accessed after the second table.
+#
+#2.2. Please add a test case when one non-correlated subquery contains
+#another non-correlated subquery.
+#Consider 4 subcases:
+#   both subqueries are materialized
+#   IN_EXIST transformations are applied to both subqueries
+#   outer subquery is materialized while the inner subquery  is not
+#(IN_EXIST transformation is applied to it)
+#   inner subqyery is materialized while the outer subquery  is not (
+#IN_EXIST transformation is applied to it)
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -38,11 +38,14 @@ Item_subselect::Item_subselect():
  Item_result_field(), value_assigned(0), own_engine(0), thd(0), old_engine(0), 
  used_tables_cache(0), have_to_be_excluded(0), const_item_cache(1),
  inside_first_fix_fields(0), done_first_fix_fields(FALSE), 
-  substitution(0), expr_cache(0), engine(0), forced_const(FALSE), eliminated(FALSE),
+  expr_cache(0), forced_const(FALSE), substitution(0), engine(0), eliminated(FALSE),
  engine_changed(0), changed(0), is_correlated(FALSE)
 {
  DBUG_ENTER("Item_subselect::Item_subselect");
  DBUG_PRINT("enter", ("this: 0x%lx", (ulong) this));
+#ifndef DBUG_OFF
+  exec_counter= 0;
+#endif
  with_subselect= 1;
  reset();
  /*
@@ -130,6 +133,10 @@ void Item_subselect::cleanup()
  value_assigned= 0;
  expr_cache= 0;
  forced_const= FALSE;
+  DBUG_PRINT("info", ("exec_counter: %d", exec_counter));
+#ifndef DBUG_OFF
+  exec_counter= 0;
+#endif
  DBUG_VOID_RETURN;
 }

@@ -548,7 +555,9 @@ bool Item_subselect::exec()
  DBUG_EXECUTE_IF("subselect_exec_fail", return 1;);

  res= engine->exec();
-
+#ifndef DBUG_OFF
+  ++exec_counter;
+#endif
  if (engine_changed)
  {
    engine_changed= 0;

--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@@ -52,6 +52,17 @@ protected:
  
  bool inside_first_fix_fields;
  bool done_first_fix_fields;
+  Item *expr_cache;
+  /*
+    Set to TRUE if at optimization or execution time we determine that this
+    item's value is a constant. We need this member because it is not possible
+    to substitute 'this' with a constant item.
+  */
+  bool forced_const;
+#ifndef DBUG_OFF
+  /* Count the number of times this subquery predicate has been executed. */
+  uint exec_counter;
+#endif
 public:
  /* 
    Used inside Item_subselect::fix_fields() according to this scenario:
@@ -66,19 +77,13 @@ public:
        substitution= NULL;
      < Item_subselect::fix_fields
  */
+  /* TODO make this protected member again. */
  Item *substitution;
-  /* unit of subquery */
-  st_select_lex_unit *unit;
-  Item *expr_cache;
  /* engine that perform execution of subselect (single select or union) */
+  /* TODO make this protected member again. */
  subselect_engine *engine;
-  /*
-    Set to TRUE if at optimization or execution time we determine that this
-    item's value is a constant. We need this member because it is not possible
-    to substitute 'this' with a constant item.
-  */
-  bool forced_const;
-
+  /* unit of subquery */
+  st_select_lex_unit *unit;
  /* A reference from inside subquery predicate to somewhere outside of it */
  class Ref_to_outside : public Sql_alloc
  {

--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -4324,8 +4324,6 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
  {
    JOIN *outer_join;
    JOIN *inner_join= this;
-    /* Number of (partial) rows of the outer JOIN filtered by the IN predicate. */
-    double outer_record_count;
    /* Number of unique value combinations filtered by the IN predicate. */
    double outer_lookup_keys;
    /* Cost and row count of the unmodified subquery. */
@@ -4345,38 +4343,37 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
    outer_join= unit->outer_select() ? unit->outer_select()->join : NULL;
    if (outer_join)
    {
-      uint outer_partial_plan_len;
+      /*
+        The index of the last JOIN_TAB in the outer JOIN where in_subs is
+        attached (pushed to).
+      */
+      uint max_outer_join_tab_idx;
      /*
        Make_cond_for_table is called for predicates only in the WHERE/ON
        clauses. In all other cases, predicates are not pushed to any
-        JOIN_TAB, and their joi_tab_idx remains MAX_TABLES. Such predicates
+        JOIN_TAB, and their join_tab_idx remains MAX_TABLES. Such predicates
        are evaluated for each complete row of the outer join.
      */
-      outer_partial_plan_len= (in_subs->get_join_tab_idx() == MAX_TABLES) ?
-                               outer_join->table_count :
-                               in_subs->get_join_tab_idx() + 1;
-      outer_join->get_partial_cost_and_fanout(outer_partial_plan_len,
+      DBUG_ASSERT(outer_join->table_count > 0);
+      max_outer_join_tab_idx= (in_subs->get_join_tab_idx() == MAX_TABLES) ?
+                               outer_join->table_count - 1:
+                               in_subs->get_join_tab_idx();
+      /*
+        TODO:
+        Currently outer_lookup_keys is computed as the number of rows in
+        the partial join including the JOIN_TAB where the IN predicate is
+        pushed to. In the general case this is a gross overestimate because
+        due to caching we are interested only in the number of unique keys.
+        The search key may be formed by columns from much fewer than all
+        tables in the partial join. Example:
+        select * from t1, t2 where t1.c1 = t2.key AND t2.c2 IN (select ...);
+        If the join order: t1, t2, the number of unique lookup keys is ~ to
+        the number of unique values t2.c2 in the partial join t1 join t2.
+      */
+      outer_join->get_partial_cost_and_fanout(max_outer_join_tab_idx,
                                              table_map(-1),
                                              &dummy,
-                                              &outer_record_count);
-
-      if (outer_join->table_count > outer_join->const_tables)
-      {
-        outer_join->get_partial_cost_and_fanout(outer_partial_plan_len,
-                                                in_subs->used_tables(),
-                                                &dummy,
-                                                &outer_lookup_keys);
-        /*
-        outer_lookup_keys= prev_record_reads(outer_join->best_positions,
-                                             outer_partial_plan_len,
-                                             in_subs->used_tables());
-        */
-      }
-      else
-      {
-        /* If all tables are constant, positions is undefined. */
-        outer_lookup_keys= 1;
-      }
+                                              &outer_lookup_keys);
    }
    else
    {
@@ -4384,17 +4381,8 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
        TODO: outer_join can be NULL for DELETE statements.
        How to compute its cost?
      */
-      outer_record_count= 1;
-      outer_lookup_keys=1;
+      outer_lookup_keys= 1;
    }
-    /*
-      There cannot be more lookup keys than the total number of records.
-      TODO: this a temporary solution until we find a better way to compute
-      get_partial_join_cost() and prev_record_reads() in a consitent manner,
-      where it is guaranteed that (outer_lookup_keys <= outer_record_count).
-    */
-    if (outer_lookup_keys > outer_record_count)
-      outer_lookup_keys= outer_record_count;

    /*
      B. Estimate the cost and number of records of the subquery both
@@ -4442,7 +4430,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
                                 write_cost * inner_record_count_1;

    materialize_strategy_cost= materialization_cost +
-                               outer_record_count * lookup_cost;
+                               outer_lookup_keys * lookup_cost;

    /* C.2 Compute the cost of the IN=>EXISTS strategy. */
    in_exists_strategy_cost= outer_lookup_keys * inner_read_time_2;
@@ -4452,6 +4440,14 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
      in_subs->in_strategy&= ~SUBS_MATERIALIZATION;
    else
      in_subs->in_strategy&= ~SUBS_IN_TO_EXISTS;
+
+    DBUG_PRINT("info",
+               ("mat_strategy_cost: %.2f, mat_cost: %.2f, write_cost: %.2f, lookup_cost: %.2f",
+                materialize_strategy_cost, materialization_cost, write_cost, lookup_cost));
+    DBUG_PRINT("info",
+               ("inx_strategy_cost: %.2f, inner_read_time_2: %.2f",
+                in_exists_strategy_cost, inner_read_time_2));
+    DBUG_PRINT("info",("outer_lookup_keys: %.2f", outer_lookup_keys));
  }

  /*
@@ -4507,9 +4503,9 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
        const_tables != table_count)
    {
      /*
-        The subquery was not reoptimized either because the user allowed only the
-        IN-EXISTS strategy, or because materialization was not possible based on
-        semantic analysis. Clenup the original plan and reoptimize.
+        The subquery was not reoptimized either because the user allowed only
+        the IN-EXISTS strategy, or because materialization was not possible
+        based on semantic analysis. Cleanup the original plan and reoptimize.
      */
      for (uint i= 0; i < table_count; i++)
      {

--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -5969,7 +5969,7 @@ void JOIN::get_partial_cost_and_fanout(uint end_tab_idx,
  }

  for (tab= first_depth_first_tab(this), i= const_tables;
-       tab;
+       (i <= end_tab_idx && tab);
       tab= next_depth_first_tab(this, tab), i++)
  {
    /*