Commit 9b475ee3 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-8289: Semijoin inflates number of rows in query result

- Make semi-join optimizer not to choose LooseScan
  when 1) the index is not covered and 2) full index
  scan will be required.

- Make sure that the code in make_join_select() that may change
  full index scan into a range scan is not invoked when the table
  uses full scan.
parent 1bfe4da1
......@@ -1262,5 +1262,38 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY T2_0_ ref FK_T2_T1Id FK_T2_T1Id 8 test.T2_1_.t1idref 1 Using index; End temporary
drop table t3,t2,t1;
set optimizer_search_depth=@tmp7474;
#
#
#
CREATE TABLE t1 (
id int(16) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE t2 (
id int(16) NOT NULL AUTO_INCREMENT,
t3_id int(16) NOT NULL DEFAULT '0',
t1_id int(16) NOT NULL DEFAULT '0',
PRIMARY KEY (id),
KEY t3_idx (t3_id),
KEY t1_idx (t1_id)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
CREATE TABLE t3 (
id int(16) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (id)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
INSERT INTO t3 VALUES (1);
INSERT INTO t2 VALUES (1, 1, 1);
INSERT INTO t2 VALUES (2, 1, 2);
INSERT INTO t2 VALUES (3, 1, 2);
INSERT INTO t2 VALUES (4, 1, 1);
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
SELECT * FROM t1 WHERE t1.id IN (
SELECT t2.t1_id FROM t3 JOIN t2 ON t3.id = t2.t3_id WHERE t3.id = 1
);
id
1
2
drop table t1,t2,t3;
# This must be the last in the file:
set optimizer_switch=@subselect_sj2_tmp;
......@@ -1277,6 +1277,39 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY T2_0_ ref FK_T2_T1Id FK_T2_T1Id 8 test.T2_1_.t1idref 1 Using index; End temporary
drop table t3,t2,t1;
set optimizer_search_depth=@tmp7474;
#
#
#
CREATE TABLE t1 (
id int(16) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE t2 (
id int(16) NOT NULL AUTO_INCREMENT,
t3_id int(16) NOT NULL DEFAULT '0',
t1_id int(16) NOT NULL DEFAULT '0',
PRIMARY KEY (id),
KEY t3_idx (t3_id),
KEY t1_idx (t1_id)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
CREATE TABLE t3 (
id int(16) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (id)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
INSERT INTO t3 VALUES (1);
INSERT INTO t2 VALUES (1, 1, 1);
INSERT INTO t2 VALUES (2, 1, 2);
INSERT INTO t2 VALUES (3, 1, 2);
INSERT INTO t2 VALUES (4, 1, 1);
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
SELECT * FROM t1 WHERE t1.id IN (
SELECT t2.t1_id FROM t3 JOIN t2 ON t3.id = t2.t3_id WHERE t3.id = 1
);
id
1
2
drop table t1,t2,t3;
# This must be the last in the file:
set optimizer_switch=@subselect_sj2_tmp;
#
......
......@@ -1264,6 +1264,39 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY T2_0_ ref FK_T2_T1Id FK_T2_T1Id 8 test.T2_1_.t1idref 1 Using index; End temporary
drop table t3,t2,t1;
set optimizer_search_depth=@tmp7474;
#
#
#
CREATE TABLE t1 (
id int(16) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE t2 (
id int(16) NOT NULL AUTO_INCREMENT,
t3_id int(16) NOT NULL DEFAULT '0',
t1_id int(16) NOT NULL DEFAULT '0',
PRIMARY KEY (id),
KEY t3_idx (t3_id),
KEY t1_idx (t1_id)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
CREATE TABLE t3 (
id int(16) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (id)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
INSERT INTO t3 VALUES (1);
INSERT INTO t2 VALUES (1, 1, 1);
INSERT INTO t2 VALUES (2, 1, 2);
INSERT INTO t2 VALUES (3, 1, 2);
INSERT INTO t2 VALUES (4, 1, 1);
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
SELECT * FROM t1 WHERE t1.id IN (
SELECT t2.t1_id FROM t3 JOIN t2 ON t3.id = t2.t3_id WHERE t3.id = 1
);
id
1
2
drop table t1,t2,t3;
# This must be the last in the file:
set optimizer_switch=@subselect_sj2_tmp;
set optimizer_switch=default;
......
......@@ -1391,5 +1391,44 @@ eval explain $query;
drop table t3,t2,t1;
set optimizer_search_depth=@tmp7474;
--echo #
--echo #
--echo #
CREATE TABLE t1 (
id int(16) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE t2 (
id int(16) NOT NULL AUTO_INCREMENT,
t3_id int(16) NOT NULL DEFAULT '0',
t1_id int(16) NOT NULL DEFAULT '0',
PRIMARY KEY (id),
KEY t3_idx (t3_id),
KEY t1_idx (t1_id)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
CREATE TABLE t3 (
id int(16) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (id)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
INSERT INTO t3 VALUES (1);
INSERT INTO t2 VALUES (1, 1, 1);
INSERT INTO t2 VALUES (2, 1, 2);
INSERT INTO t2 VALUES (3, 1, 2);
INSERT INTO t2 VALUES (4, 1, 1);
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
SELECT * FROM t1 WHERE t1.id IN (
SELECT t2.t1_id FROM t3 JOIN t2 ON t3.id = t2.t3_id WHERE t3.id = 1
);
drop table t1,t2,t3;
--echo # This must be the last in the file:
set optimizer_switch=@subselect_sj2_tmp;
......@@ -4361,6 +4361,74 @@ int init_dups_weedout(JOIN *join, uint first_table, int first_fanout_table, uint
}
/*
@brief
Set up semi-join Loose Scan strategy for execution
@detail
Other strategies are done in setup_semijoin_dups_elimination(),
however, we need to set up Loose Scan earlier, before make_join_select is
called. This is to prevent make_join_select() from switching full index
scans into quick selects (which will break Loose Scan access).
@return
0 OK
1 Error
*/
int setup_semijoin_loosescan(JOIN *join)
{
uint i;
DBUG_ENTER("setup_semijoin_loosescan");
POSITION *pos= join->best_positions + join->const_tables;
for (i= join->const_tables ; i < join->top_join_tab_count; )
{
JOIN_TAB *tab=join->join_tab + i;
switch (pos->sj_strategy) {
case SJ_OPT_MATERIALIZE:
case SJ_OPT_MATERIALIZE_SCAN:
i+= 1; /* join tabs are embedded in the nest */
pos += pos->n_sj_tables;
break;
case SJ_OPT_LOOSE_SCAN:
{
/* We jump from the last table to the first one */
tab->loosescan_match_tab= tab + pos->n_sj_tables - 1;
/* LooseScan requires records to be produced in order */
if (tab->select && tab->select->quick)
tab->select->quick->need_sorted_output();
for (uint j= i; j < i + pos->n_sj_tables; j++)
join->join_tab[j].inside_loosescan_range= TRUE;
/* Calculate key length */
uint keylen= 0;
uint keyno= pos->loosescan_picker.loosescan_key;
for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++)
keylen += tab->table->key_info[keyno].key_part[kp].store_length;
tab->loosescan_key= keyno;
tab->loosescan_key_len= keylen;
if (pos->n_sj_tables > 1)
tab[pos->n_sj_tables - 1].do_firstmatch= tab;
i+= pos->n_sj_tables;
pos+= pos->n_sj_tables;
break;
}
default:
{
i++;
pos++;
break;
}
}
}
DBUG_RETURN(FALSE);
}
/*
Setup the strategies to eliminate semi-join duplicates.
......@@ -4469,8 +4537,6 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
for (i= join->const_tables ; i < join->top_join_tab_count; )
{
JOIN_TAB *tab=join->join_tab + i;
//POSITION *pos= join->best_positions + i;
uint keylen, keyno;
switch (pos->sj_strategy) {
case SJ_OPT_MATERIALIZE:
case SJ_OPT_MATERIALIZE_SCAN:
......@@ -4480,26 +4546,7 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
break;
case SJ_OPT_LOOSE_SCAN:
{
/* We jump from the last table to the first one */
tab->loosescan_match_tab= tab + pos->n_sj_tables - 1;
/* LooseScan requires records to be produced in order */
if (tab->select && tab->select->quick)
tab->select->quick->need_sorted_output();
for (uint j= i; j < i + pos->n_sj_tables; j++)
join->join_tab[j].inside_loosescan_range= TRUE;
/* Calculate key length */
keylen= 0;
keyno= pos->loosescan_picker.loosescan_key;
for (uint kp=0; kp < pos->loosescan_picker.loosescan_parts; kp++)
keylen += tab->table->key_info[keyno].key_part[kp].store_length;
tab->loosescan_key= keyno;
tab->loosescan_key_len= keylen;
if (pos->n_sj_tables > 1)
tab[pos->n_sj_tables - 1].do_firstmatch= tab;
/* Setup already handled by setup_semijoin_loosescan */
i+= pos->n_sj_tables;
pos+= pos->n_sj_tables;
break;
......
......@@ -194,8 +194,6 @@ public:
PREV_BITS(key_part_map, max_loose_keypart+1) && // (3)
!key_uses_partial_cols(s->table->s, key))
{
/* Ok, can use the strategy */
part1_conds_met= TRUE;
if (s->quick && s->quick->index == key &&
s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
{
......@@ -204,6 +202,12 @@ public:
}
DBUG_PRINT("info", ("Can use LooseScan scan"));
if (found_part & 1)
{
/* Can use LooseScan on ref access if the first key part is bound */
part1_conds_met= TRUE;
}
/*
Check if this is a special case where there are no usable bound
IN-equalities, i.e. we have
......@@ -211,11 +215,13 @@ public:
outer_expr IN (SELECT innertbl.key FROM ...)
and outer_expr cannot be evaluated yet, so it's actually full
index scan and not a ref access
index scan and not a ref access.
We can do full index scan if it uses index-only.
*/
if (!(found_part & 1 ) && /* no usable ref access for 1st key part */
s->table->covering_keys.is_set(key))
{
part1_conds_met= TRUE;
DBUG_PRINT("info", ("Can use full index scan for LooseScan"));
/* Calculate the cost of complete loose index scan. */
......@@ -383,6 +389,7 @@ public:
bool create_sj_weedout_tmp_table(THD *thd);
};
int setup_semijoin_loosescan(JOIN *join);
int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
uint no_jbuf_after);
void destroy_sj_tmp_tables(JOIN *join);
......
......@@ -1530,6 +1530,9 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S
/* Cache constant expressions in WHERE, HAVING, ON clauses. */
cache_const_exprs();
if (setup_semijoin_loosescan(this))
DBUG_RETURN(1);
if (make_join_select(this, select, conds))
{
zero_result_cause=
......@@ -9676,9 +9679,14 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
Check again if we should use an index.
We could have used an column from a previous table in
the index if we are using limit and this is the first table
(1) - Don't switch the used index if we are using semi-join
LooseScan on this table. Using different index will not
produce the desired ordering and de-duplication.
*/
if (!tab->table->is_filled_at_execution() &&
!tab->loosescan_match_tab && // (1)
((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) ||
(!tab->const_keys.is_clear_all() && i == join->const_tables &&
join->unit->select_limit_cnt <
......
......@@ -717,8 +717,7 @@ public:
struct st_position *pos,
struct st_position *loose_scan_pos);
friend bool get_best_combination(JOIN *join);
friend int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
uint no_jbuf_after);
friend int setup_semijoin_loosescan(JOIN *join);
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment