Commit 1230f3ad authored by sergefp@mysql.com's avatar sergefp@mysql.com

BUG#21282: Incorrect query results for "t.key NOT IN (<big const list>)

In fix for BUG#15872, a condition of type "t.key NOT IN (c1, .... cN)"
where N>1000, was incorrectly converted to
  (-inf < X < c_min) OR (c_max < X)
Now this conversion is removed, we dont produce any range lists for such
conditions.
parent b5f814ab
......@@ -838,3 +838,25 @@ select a, hex(filler) from t1 where a not between 'b' and 'b';
a hex(filler)
a 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
drop table t1,t2,t3;
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, key(a));
insert into t2 select 2*(A.a + 10*(B.a + 10*C.a)) from t1 A, t1 B, t1 C;
set @a="select * from t2 force index (a) where a NOT IN(0";
select count(*) from (select @a:=concat(@a, ',', a) from t2 ) Z;
count(*)
1000
set @a=concat(@a, ')');
insert into t2 values (11),(13),(15);
set @b= concat("explain ", @a);
prepare stmt1 from @b;
execute stmt1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 index a a 5 NULL 1003 Using where; Using index
prepare stmt1 from @a;
execute stmt1;
a
11
13
15
drop table t1, t2;
......@@ -656,3 +656,28 @@ explain select * from t1 where a not between 'b' and 'b';
select a, hex(filler) from t1 where a not between 'b' and 'b';
drop table t1,t2,t3;
#
# BUG#21282
#
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, key(a));
insert into t2 select 2*(A.a + 10*(B.a + 10*C.a)) from t1 A, t1 B, t1 C;
set @a="select * from t2 force index (a) where a NOT IN(0";
select count(*) from (select @a:=concat(@a, ',', a) from t2 ) Z;
set @a=concat(@a, ')');
insert into t2 values (11),(13),(15);
set @b= concat("explain ", @a);
prepare stmt1 from @b;
execute stmt1;
prepare stmt1 from @a;
execute stmt1;
drop table t1, t2;
# End of 5.0 tests
......@@ -3608,41 +3608,33 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
if (func->array && func->cmp_type != ROW_RESULT)
{
/*
We get here for conditions in form "t.key NOT IN (c1, c2, ...)"
(where c{i} are constants).
Our goal is to produce a SEL_ARG graph that represents intervals:
We get here for conditions in form "t.key NOT IN (c1, c2, ...)",
where c{i} are constants. Our goal is to produce a SEL_TREE that
represents intervals:
($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ... (*)
where $MIN is either "-inf" or NULL.
The most straightforward way to handle NOT IN would be to convert
it to "(t.key != c1) AND (t.key != c2) AND ..." and let the range
optimizer to build SEL_ARG graph from that. However that will cause
the range optimizer to use O(N^2) memory (it's a bug, not filed),
and people do use big NOT IN lists (see BUG#15872). Also, for big
NOT IN lists constructing/using graph (*) does not make the query
faster.
So, we will handle NOT IN manually in the following way:
* if the number of entries in the NOT IN list is less then
NOT_IN_IGNORE_THRESHOLD, we will construct SEL_ARG graph (*)
manually.
* Otherwise, we will construct a smaller graph: for
"t.key NOT IN (c1,...cN)" we construct a graph representing
($MIN < t.key) OR (cN < t.key) // here sequence of c_i is
// ordered.
A note about partially-covering indexes: for those (e.g. for
"a CHAR(10), KEY(a(5))") the handling is correct (albeit not very
efficient):
Instead of "t.key < c1" we get "t.key <= prefix-val(c1)".
Combining the intervals in (*) together, we get:
(-inf<=t.key<=c1) OR (c1<=t.key<=c2) OR (c2<=t.key<=c3) OR ...
i.e. actually we get intervals combined into one interval:
(-inf<=t.key<=+inf). This doesn't make much sense but it doesn't
cause any problems.
The most straightforward way to produce it is to convert NOT IN
into "(t.key != c1) AND (t.key != c2) AND ... " and let the range
analyzer to build SEL_TREE from that. The problem is that the
range analyzer will use O(N^2) memory (which is probably a bug),
and people do use big NOT IN lists (e.g. see BUG#15872, BUG#21282),
will run out of memory.
Another problem with big lists like (*) is that a big list is
unlikely to produce a good "range" access, while considering that
range access will require expensive CPU calculations (and for
MyISAM even index accesses). In short, big NOT IN lists are rarely
worth analyzing.
Considering the above, we'll handle NOT IN as follows:
* if the number of entries in the NOT IN list is less than
NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*) manually.
* Otherwise, don't produce a SEL_TREE.
*/
#define NOT_IN_IGNORE_THRESHOLD 1000
MEM_ROOT *tmp_root= param->mem_root;
param->thd->mem_root= param->old_root;
/*
......@@ -3656,9 +3648,9 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
Item *value_item= func->array->create_item();
param->thd->mem_root= tmp_root;
if (!value_item)
if (func->array->count > NOT_IN_IGNORE_THRESHOLD || !value_item)
break;
/* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */
uint i=0;
do
......@@ -3677,45 +3669,39 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
tree= NULL;
break;
}
#define NOT_IN_IGNORE_THRESHOLD 1000
SEL_TREE *tree2;
if (func->array->count < NOT_IN_IGNORE_THRESHOLD)
for (; i < func->array->count; i++)
{
for (; i < func->array->count; i++)
if (func->array->compare_elems(i, i-1))
{
if (func->array->compare_elems(i, i-1))
/* Get a SEL_TREE for "-inf < X < c_i" interval */
func->array->value_to_item(i, value_item);
tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
value_item, cmp_type);
if (!tree2)
{
/* Get a SEL_TREE for "-inf < X < c_i" interval */
func->array->value_to_item(i, value_item);
tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
value_item, cmp_type);
if (!tree2)
{
tree= NULL;
break;
}
tree= NULL;
break;
}
/* Change all intervals to be "c_{i-1} < X < c_i" */
for (uint idx= 0; idx < param->keys; idx++)
/* Change all intervals to be "c_{i-1} < X < c_i" */
for (uint idx= 0; idx < param->keys; idx++)
{
SEL_ARG *new_interval, *last_val;
if (((new_interval= tree2->keys[idx])) &&
((last_val= tree->keys[idx]->last())))
{
SEL_ARG *new_interval, *last_val;
if (((new_interval= tree2->keys[idx])) &&
((last_val= tree->keys[idx]->last())))
{
new_interval->min_value= last_val->max_value;
new_interval->min_flag= NEAR_MIN;
}
new_interval->min_value= last_val->max_value;
new_interval->min_flag= NEAR_MIN;
}
/*
The following doesn't try to allocate memory so no need to
check for NULL.
*/
tree= tree_or(param, tree, tree2);
}
/*
The following doesn't try to allocate memory so no need to
check for NULL.
*/
tree= tree_or(param, tree, tree2);
}
}
else
func->array->value_to_item(func->array->count - 1, value_item);
if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
{
......@@ -3780,7 +3766,6 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
}
DBUG_RETURN(tree);
}
/* make a select tree of all keys in condition */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment