Commit 4f4d03a4 authored by Martin Hansson's avatar Martin Hansson

Bug#51070: Query with a NOT IN subquery predicate returns a wrong result set

      
The EXISTS transformation has additional switches to catch the known corner
cases that appear when transforming an IN predicate into EXISTS. Guarded
conditions are used which are deactivated when a NULL value is seen in the
outer expression's row. When the inner query block supplies NULL values,
however, they are filtered out because no distinction is made between the
guarded conditions; guarded NOT x IS NULL conditions in the HAVING clause that
filter out NULL values cannot be de-activated in isolation from those that
match values or from the outer expression or NULL's.

The above problem is handled by making the guarded conditions remember whether
they have rejected a NULL value or not, and index access methods are taking
this into account as well. 

The bug consisted of 

1) Not resetting the property for every nested loop iteration on the inner
   query's result.

2) Not propagating the NULL result properly from inner query to IN optimizer.

3) A hack that may or may not have been needed at some point. According to a
   comment it was aimed to fix #2 by returning NULL when FALSE was actually
   the result. This caused failures when #2 was properly fixed. The hack is
   now removed.

The fix resolves all three points.
parent d6f6db6f
......@@ -77,6 +77,92 @@ Note 1249 Select 2 was reduced during optimization
CREATE VIEW v1 AS SELECT 1 LIKE ( 1 IN ( SELECT 1 ) );
CREATE VIEW v2 AS SELECT 1 LIKE '%' ESCAPE ( 1 IN ( SELECT 1 ) );
DROP VIEW v1, v2;
#
# Bug#51070: Query with a NOT IN subquery predicate returns a wrong
# result set
#
CREATE TABLE t1 ( a INT, b INT );
INSERT INTO t1 VALUES ( 1, NULL ), ( 2, NULL );
CREATE TABLE t2 ( c INT, d INT );
INSERT INTO t2 VALUES ( NULL, 3 ), ( NULL, 4 );
CREATE TABLE t3 ( e INT, f INT );
INSERT INTO t3 VALUES ( NULL, NULL ), ( NULL, NULL );
CREATE TABLE t4 ( a INT );
INSERT INTO t4 VALUES (1), (2), (3);
CREATE TABLE t5 ( a INT );
INSERT INTO t5 VALUES (NULL), (2);
EXPLAIN
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 );
id select_type table type possible_keys key key_len ref rows Extra
x PRIMARY x x x x x x x x
x DEPENDENT SUBQUERY x x x x x x x x
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 );
a b
EXPLAIN
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS NULL;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 2 Using where
2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 2 Using where
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS NULL;
a b
1 NULL
2 NULL
SELECT * FROM t1 WHERE ( a, b ) IN ( SELECT c, d FROM t2 ) IS NULL;
a b
1 NULL
2 NULL
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS UNKNOWN;
a b
1 NULL
2 NULL
SELECT * FROM t1 WHERE (( a, b ) NOT IN ( SELECT c, d FROM t2 )) IS UNKNOWN;
a b
1 NULL
2 NULL
SELECT * FROM t1 WHERE 1 = 1 AND ( a, b ) NOT IN ( SELECT c, d FROM t2 );
a b
EXPLAIN
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT e, f FROM t3 );
id select_type table type possible_keys key key_len ref rows Extra
x PRIMARY x x x x x x x x
x DEPENDENT SUBQUERY x x x x x x x x
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT e, f FROM t3 );
a b
EXPLAIN
SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT a, b FROM t1 );
id select_type table type possible_keys key key_len ref rows Extra
x PRIMARY x x x x x x x x
x DEPENDENT SUBQUERY x x x x x x x x
SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT a, b FROM t1 );
c d
EXPLAIN
SELECT * FROM t3 WHERE ( e, f ) NOT IN ( SELECT c, d FROM t2 );
id select_type table type possible_keys key key_len ref rows Extra
x PRIMARY x x x x x x x x
x DEPENDENT SUBQUERY x x x x x x x x
SELECT * FROM t3 WHERE ( e, f ) NOT IN ( SELECT c, d FROM t2 );
e f
EXPLAIN
SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT e, f FROM t3 );
id select_type table type possible_keys key key_len ref rows Extra
x PRIMARY x x x x x x x x
x DEPENDENT SUBQUERY x x x x x x x x
SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT e, f FROM t3 );
c d
SELECT * FROM t1 WHERE ( a, b ) NOT IN
( SELECT c, d FROM t2 WHERE c = 1 AND c <> 1 );
a b
1 NULL
2 NULL
SELECT * FROM t1 WHERE b NOT IN ( SELECT c FROM t2 WHERE c = 1 );
a b
1 NULL
2 NULL
SELECT * FROM t1 WHERE NULL NOT IN ( SELECT c FROM t2 WHERE c = 1 AND c <> 1 );
a b
1 NULL
2 NULL
DROP TABLE t1, t2, t3, t4, t5;
#
# End of 5.1 tests.
#
......@@ -74,6 +74,68 @@ CREATE VIEW v1 AS SELECT 1 LIKE ( 1 IN ( SELECT 1 ) );
CREATE VIEW v2 AS SELECT 1 LIKE '%' ESCAPE ( 1 IN ( SELECT 1 ) );
DROP VIEW v1, v2;
--echo #
--echo # Bug#51070: Query with a NOT IN subquery predicate returns a wrong
--echo # result set
--echo #
CREATE TABLE t1 ( a INT, b INT );
INSERT INTO t1 VALUES ( 1, NULL ), ( 2, NULL );
CREATE TABLE t2 ( c INT, d INT );
INSERT INTO t2 VALUES ( NULL, 3 ), ( NULL, 4 );
CREATE TABLE t3 ( e INT, f INT );
INSERT INTO t3 VALUES ( NULL, NULL ), ( NULL, NULL );
CREATE TABLE t4 ( a INT );
INSERT INTO t4 VALUES (1), (2), (3);
CREATE TABLE t5 ( a INT );
INSERT INTO t5 VALUES (NULL), (2);
--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x
EXPLAIN
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 );
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 );
EXPLAIN
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS NULL;
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS NULL;
SELECT * FROM t1 WHERE ( a, b ) IN ( SELECT c, d FROM t2 ) IS NULL;
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS UNKNOWN;
SELECT * FROM t1 WHERE (( a, b ) NOT IN ( SELECT c, d FROM t2 )) IS UNKNOWN;
SELECT * FROM t1 WHERE 1 = 1 AND ( a, b ) NOT IN ( SELECT c, d FROM t2 );
--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x
EXPLAIN
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT e, f FROM t3 );
SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT e, f FROM t3 );
--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x
EXPLAIN
SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT a, b FROM t1 );
SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT a, b FROM t1 );
--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x
EXPLAIN
SELECT * FROM t3 WHERE ( e, f ) NOT IN ( SELECT c, d FROM t2 );
SELECT * FROM t3 WHERE ( e, f ) NOT IN ( SELECT c, d FROM t2 );
--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x
EXPLAIN
SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT e, f FROM t3 );
SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT e, f FROM t3 );
SELECT * FROM t1 WHERE ( a, b ) NOT IN
( SELECT c, d FROM t2 WHERE c = 1 AND c <> 1 );
SELECT * FROM t1 WHERE b NOT IN ( SELECT c FROM t2 WHERE c = 1 );
SELECT * FROM t1 WHERE NULL NOT IN ( SELECT c FROM t2 WHERE c = 1 AND c <> 1 );
DROP TABLE t1, t2, t3, t4, t5;
--echo #
--echo # End of 5.1 tests.
......
......@@ -1751,6 +1751,76 @@ bool Item_in_optimizer::fix_fields(THD *thd, Item **ref)
}
/**
The implementation of optimized \<outer expression\> [NOT] IN \<subquery\>
predicates. The implementation works as follows.
For the current value of the outer expression
- If it contains only NULL values, the original (before rewrite by the
Item_in_subselect rewrite methods) inner subquery is non-correlated and
was previously executed, there is no need to re-execute it, and the
previous return value is returned.
- If it contains NULL values, check if there is a partial match for the
inner query block by evaluating it. For clarity we repeat here the
transformation previously performed on the sub-query. The expression
<tt>
( oc_1, ..., oc_n )
\<in predicate\>
( SELECT ic_1, ..., ic_n
FROM \<table\>
WHERE \<inner where\>
)
</tt>
was transformed into
<tt>
( oc_1, ..., oc_n )
\<in predicate\>
( SELECT ic_1, ..., ic_n
FROM \<table\>
WHERE \<inner where\> AND ... ( ic_k = oc_k OR ic_k IS NULL )
HAVING ... NOT ic_k IS NULL
)
</tt>
The evaluation will now proceed according to special rules set up
elsewhere. These rules include:
- The HAVING NOT \<inner column\> IS NULL conditions added by the
aforementioned rewrite methods will detect whether they evaluated (and
rejected) a NULL value and if so, will cause the subquery to evaluate
to NULL.
- The added WHERE and HAVING conditions are present only for those inner
columns that correspond to outer column that are not NULL at the moment.
- If there is an eligible index for executing the subquery, the special
access method "Full scan on NULL key" is employed which ensures that
the inner query will detect if there are NULL values resulting from the
inner query. This access method will quietly resort to table scan if it
needs to find NULL values as well.
- Under these conditions, the sub-query need only be evaluated in order to
find out whether it produced any rows.
- If it did, we know that there was a partial match since there are
NULL values in the outer row expression.
- If it did not, the result is FALSE or UNKNOWN. If at least one of the
HAVING sub-predicates rejected a NULL value corresponding to an outer
non-NULL, and hence the inner query block returns UNKNOWN upon
evaluation, there was a partial match and the result is UNKNOWN.
- If it contains no NULL values, the call is forwarded to the inner query
block.
@see Item_in_subselect::val_bool()
@see Item_is_not_null_test::val_int()
*/
longlong Item_in_optimizer::val_int()
{
bool tmp;
......@@ -1804,7 +1874,7 @@ longlong Item_in_optimizer::val_int()
all_left_cols_null= false;
}
if (!((Item_in_subselect*)args[1])->is_correlated &&
if (!item_subs->is_correlated &&
all_left_cols_null && result_for_null_param != UNKNOWN)
{
/*
......@@ -1818,8 +1888,11 @@ longlong Item_in_optimizer::val_int()
else
{
/* The subquery has to be evaluated */
(void) args[1]->val_bool_result();
null_value= !item_subs->engine->no_rows();
(void) item_subs->val_bool_result();
if (item_subs->engine->no_rows())
null_value= item_subs->null_value;
else
null_value= TRUE;
if (all_left_cols_null)
result_for_null_param= null_value;
}
......
......@@ -47,7 +47,7 @@ Item_subselect::Item_subselect():
item value is NULL if select_subselect not changed this value
(i.e. some rows will be found returned)
*/
null_value= 1;
null_value= TRUE;
}
......@@ -427,9 +427,9 @@ void Item_maxmin_subselect::print(String *str, enum_query_type query_type)
void Item_singlerow_subselect::reset()
{
null_value= 1;
null_value= TRUE;
if (value)
value->null_value= 1;
value->null_value= TRUE;
}
......@@ -574,7 +574,7 @@ double Item_singlerow_subselect::val_real()
DBUG_ASSERT(fixed == 1);
if (!exec() && !value->null_value)
{
null_value= 0;
null_value= FALSE;
return value->val_real();
}
else
......@@ -589,7 +589,7 @@ longlong Item_singlerow_subselect::val_int()
DBUG_ASSERT(fixed == 1);
if (!exec() && !value->null_value)
{
null_value= 0;
null_value= FALSE;
return value->val_int();
}
else
......@@ -603,7 +603,7 @@ String *Item_singlerow_subselect::val_str(String *str)
{
if (!exec() && !value->null_value)
{
null_value= 0;
null_value= FALSE;
return value->val_str(str);
}
else
......@@ -618,7 +618,7 @@ my_decimal *Item_singlerow_subselect::val_decimal(my_decimal *decimal_value)
{
if (!exec() && !value->null_value)
{
null_value= 0;
null_value= FALSE;
return value->val_decimal(decimal_value);
}
else
......@@ -633,7 +633,7 @@ bool Item_singlerow_subselect::val_bool()
{
if (!exec() && !value->null_value)
{
null_value= 0;
null_value= FALSE;
return value->val_bool();
}
else
......@@ -651,7 +651,7 @@ Item_exists_subselect::Item_exists_subselect(st_select_lex *select_lex):
bool val_bool();
init(select_lex, new select_exists_subselect(this));
max_columns= UINT_MAX;
null_value= 0; //can't be NULL
null_value= FALSE; //can't be NULL
maybe_null= 0; //can't be NULL
value= 0;
DBUG_VOID_RETURN;
......@@ -814,15 +814,14 @@ double Item_in_subselect::val_real()
*/
DBUG_ASSERT(0);
DBUG_ASSERT(fixed == 1);
null_value= 0;
null_value= was_null= FALSE;
if (exec())
{
reset();
null_value= 1;
return 0;
}
if (was_null && !value)
null_value= 1;
null_value= TRUE;
return (double) value;
}
......@@ -835,15 +834,14 @@ longlong Item_in_subselect::val_int()
*/
DBUG_ASSERT(0);
DBUG_ASSERT(fixed == 1);
null_value= 0;
null_value= was_null= FALSE;
if (exec())
{
reset();
null_value= 1;
return 0;
}
if (was_null && !value)
null_value= 1;
null_value= TRUE;
return value;
}
......@@ -856,16 +854,15 @@ String *Item_in_subselect::val_str(String *str)
*/
DBUG_ASSERT(0);
DBUG_ASSERT(fixed == 1);
null_value= 0;
null_value= was_null= FALSE;
if (exec())
{
reset();
null_value= 1;
return 0;
}
if (was_null && !value)
{
null_value= 1;
null_value= TRUE;
return 0;
}
str->set((ulonglong)value, &my_charset_bin);
......@@ -876,20 +873,14 @@ String *Item_in_subselect::val_str(String *str)
bool Item_in_subselect::val_bool()
{
DBUG_ASSERT(fixed == 1);
null_value= 0;
null_value= was_null= FALSE;
if (exec())
{
reset();
/*
Must mark the IN predicate as NULL so as to make sure an enclosing NOT
predicate will return FALSE. See the comments in
subselect_uniquesubquery_engine::copy_ref_key for further details.
*/
null_value= 1;
return 0;
}
if (was_null && !value)
null_value= 1;
null_value= TRUE;
return value;
}
......@@ -900,16 +891,15 @@ my_decimal *Item_in_subselect::val_decimal(my_decimal *decimal_value)
method should not be used
*/
DBUG_ASSERT(0);
null_value= 0;
null_value= was_null= FALSE;
DBUG_ASSERT(fixed == 1);
if (exec())
{
reset();
null_value= 1;
return 0;
}
if (was_null && !value)
null_value= 1;
null_value= TRUE;
int2my_decimal(E_DEC_FATAL_ERROR, value, 0, decimal_value);
return decimal_value;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment