BUG#20420: optimizer reports wrong keys on left join with IN

 When checking if an IN predicate can be evaluated using a key
 the optimizer makes sure that all the arguments of IN are of
 the same result type. To assure that it check whether 
 Item_func_in::array is filled in. 
 However Item_func_in::array is set if the types are
 the same AND all the arguments are compile time constants.
 Fixed by introducing Item_func_in::arg_types_compatible
 flag to allow correct checking of the desired condition.
parent 7fd3cd9e
......@@ -351,6 +351,50 @@ some_id
1
2
drop table t1;
CREATE TABLE t1 (a int, b int, PRIMARY KEY (a));
INSERT INTO t1 VALUES (1,1),(2,1),(3,1),(4,1),(5,1),(6,1);
CREATE TABLE t2 (a int, b int, PRIMARY KEY (a));
INSERT INTO t2 VALUES (3,2),(4,2),(100,100),(101,201),(102,102);
CREATE TABLE t3 (a int PRIMARY KEY);
INSERT INTO t3 VALUES (1),(2),(3),(4);
CREATE TABLE t4 (a int PRIMARY KEY,b int);
INSERT INTO t4 VALUES (1,1),(2,2),(1000,1000),(1001,1001),(1002,1002),
(1003,1003),(1004,1004);
EXPLAIN SELECT STRAIGHT_JOIN * FROM t3
JOIN t1 ON t3.a=t1.a
JOIN t2 ON t3.a=t2.a
JOIN t4 WHERE t4.a IN (t1.b, t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t3 index PRIMARY PRIMARY 4 NULL 4 Using index
1 SIMPLE t1 eq_ref PRIMARY PRIMARY 4 test.t3.a 1
1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t3.a 1
1 SIMPLE t4 ALL PRIMARY NULL NULL NULL 7 Range checked for each record (index map: 0x1)
SELECT STRAIGHT_JOIN * FROM t3
JOIN t1 ON t3.a=t1.a
JOIN t2 ON t3.a=t2.a
JOIN t4 WHERE t4.a IN (t1.b, t2.b);
a a b a b a b
3 3 1 3 2 1 1
3 3 1 3 2 2 2
4 4 1 4 2 1 1
4 4 1 4 2 2 2
EXPLAIN SELECT STRAIGHT_JOIN
(SELECT SUM(t4.a) FROM t4 WHERE t4.a IN (t1.b, t2.b))
FROM t3, t1, t2
WHERE t3.a=t1.a AND t3.a=t2.a;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t3 index PRIMARY PRIMARY 4 NULL 4 Using index
1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t3.a 1
1 PRIMARY t2 eq_ref PRIMARY PRIMARY 4 test.t3.a 1
2 DEPENDENT SUBQUERY t4 index NULL PRIMARY 4 NULL 7 Using where; Using index
SELECT STRAIGHT_JOIN
(SELECT SUM(t4.a) FROM t4 WHERE t4.a IN (t1.b, t2.b))
FROM t3, t1, t2
WHERE t3.a=t1.a AND t3.a=t2.a;
(SELECT SUM(t4.a) FROM t4 WHERE t4.a IN (t1.b, t2.b))
3
3
DROP TABLE t1,t2,t3,t4;
End of 5.0 tests
create table t1(f1 char(1));
insert into t1 values ('a'),('b'),('1');
......
......@@ -254,44 +254,43 @@ select some_id from t1 where some_id not in(-4,-1,-4);
select some_id from t1 where some_id not in(-4,-1,3423534,2342342);
drop table t1;
# TODO:Disabled until re-resolution of bug #20420 for 5.1.
# Results must be the same as in 5.0
##
## BUG#20420: optimizer reports wrong keys on left join with IN
##
#CREATE TABLE t1 (a int, b int, PRIMARY KEY (a));
#INSERT INTO t1 VALUES (1,1),(2,1),(3,1),(4,1),(5,1),(6,1);
#
#CREATE TABLE t2 (a int, b int, PRIMARY KEY (a));
#INSERT INTO t2 VALUES (3,2),(4,2),(100,100),(101,201),(102,102);
# BUG#20420: optimizer reports wrong keys on left join with IN
#
#CREATE TABLE t3 (a int PRIMARY KEY);
#INSERT INTO t3 VALUES (1),(2),(3),(4);
#
#CREATE TABLE t4 (a int PRIMARY KEY,b int);
#INSERT INTO t4 VALUES (1,1),(2,2),(1000,1000),(1001,1001),(1002,1002),(1003,1003),(1004,1004);
#
#EXPLAIN SELECT STRAIGHT_JOIN * FROM t3
# JOIN t1 ON t3.a=t1.a
# JOIN t2 ON t3.a=t2.a
# JOIN t4 WHERE t4.a IN (t1.b, t2.b);
#
#SELECT STRAIGHT_JOIN * FROM t3
# JOIN t1 ON t3.a=t1.a
# JOIN t2 ON t3.a=t2.a
# JOIN t4 WHERE t4.a IN (t1.b, t2.b);
#
#EXPLAIN SELECT STRAIGHT_JOIN
# (SELECT SUM(t4.a) FROM t4 WHERE t4.a IN (t1.b, t2.b))
# FROM t3, t1, t2
# WHERE t3.a=t1.a AND t3.a=t2.a;
#
#SELECT STRAIGHT_JOIN
# (SELECT SUM(t4.a) FROM t4 WHERE t4.a IN (t1.b, t2.b))
# FROM t3, t1, t2
# WHERE t3.a=t1.a AND t3.a=t2.a;
#
#DROP TABLE t1,t2,t3,t4;
CREATE TABLE t1 (a int, b int, PRIMARY KEY (a));
INSERT INTO t1 VALUES (1,1),(2,1),(3,1),(4,1),(5,1),(6,1);
CREATE TABLE t2 (a int, b int, PRIMARY KEY (a));
INSERT INTO t2 VALUES (3,2),(4,2),(100,100),(101,201),(102,102);
CREATE TABLE t3 (a int PRIMARY KEY);
INSERT INTO t3 VALUES (1),(2),(3),(4);
CREATE TABLE t4 (a int PRIMARY KEY,b int);
INSERT INTO t4 VALUES (1,1),(2,2),(1000,1000),(1001,1001),(1002,1002),
(1003,1003),(1004,1004);
EXPLAIN SELECT STRAIGHT_JOIN * FROM t3
JOIN t1 ON t3.a=t1.a
JOIN t2 ON t3.a=t2.a
JOIN t4 WHERE t4.a IN (t1.b, t2.b);
SELECT STRAIGHT_JOIN * FROM t3
JOIN t1 ON t3.a=t1.a
JOIN t2 ON t3.a=t2.a
JOIN t4 WHERE t4.a IN (t1.b, t2.b);
EXPLAIN SELECT STRAIGHT_JOIN
(SELECT SUM(t4.a) FROM t4 WHERE t4.a IN (t1.b, t2.b))
FROM t3, t1, t2
WHERE t3.a=t1.a AND t3.a=t2.a;
SELECT STRAIGHT_JOIN
(SELECT SUM(t4.a) FROM t4 WHERE t4.a IN (t1.b, t2.b))
FROM t3, t1, t2
WHERE t3.a=t1.a AND t3.a=t2.a;
DROP TABLE t1,t2,t3,t4;
--echo End of 5.0 tests
......
......@@ -2484,6 +2484,7 @@ void Item_func_in::fix_length_and_dec()
THD *thd= current_thd;
uint found_types= 0;
uint type_cnt= 0, i;
Item_result cmp_type;
left_result_type= args[0]->result_type();
found_types= collect_cmp_types(args, arg_count);
......@@ -2498,25 +2499,28 @@ void Item_func_in::fix_length_and_dec()
for (i= 0; i <= (uint)DECIMAL_RESULT; i++)
{
if (found_types & 1 << i)
{
(type_cnt)++;
cmp_type= (Item_result) i;
}
}
if (type_cnt == 1)
{
if (cmp_type == STRING_RESULT &&
agg_arg_charsets(cmp_collation, args, arg_count, MY_COLL_CMP_CONV, 1))
return;
arg_types_compatible= TRUE;
}
/*
Row item with NULLs inside can return NULL or FALSE =>
they can't be processed as static
*/
if (type_cnt == 1 && const_itm && !nulls_in_row())
{
uint tmp_type;
Item_result cmp_type;
/* Only one cmp type was found. Extract it here */
for (tmp_type= 0; found_types - 1; found_types>>= 1)
tmp_type++;
cmp_type= (Item_result)tmp_type;
switch (cmp_type) {
case STRING_RESULT:
if (agg_arg_charsets(cmp_collation, args, arg_count, MY_COLL_CMP_CONV, 1))
return;
array=new in_string(arg_count - 1,(qsort2_cmp) srtcmp_in,
cmp_collation.collation);
break;
......
......@@ -1041,12 +1041,18 @@ public:
*/
in_vector *array;
bool have_null;
/*
true when all arguments of the IN clause are of compatible types
and can be used safely as comparisons for key conditions
*/
bool arg_types_compatible;
Item_result left_result_type;
cmp_item *cmp_items[5]; /* One cmp_item for each result type */
DTCollation cmp_collation;
Item_func_in(List<Item> &list)
:Item_func_opt_neg(list), array(0), have_null(0)
:Item_func_opt_neg(list), array(0), have_null(0),
arg_types_compatible(FALSE)
{
bzero(&cmp_items, sizeof(cmp_items));
allowed_arg_cols= 0; // Fetch this value from first argument
......
......@@ -4928,8 +4928,8 @@ static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func,
type. Tree won't be built for values with different result types,
so we check it here to avoid unnecessary work.
*/
if (!func->array)
break;
if (!func->arg_types_compatible)
break;
if (inv)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment