Commit 1230f3ad authored by sergefp@mysql.com's avatar sergefp@mysql.com

BUG#21282: Incorrect query results for "t.key NOT IN (<big const list>)

In fix for BUG#15872, a condition of type "t.key NOT IN (c1, .... cN)"
where N>1000, was incorrectly converted to
  (-inf < X < c_min) OR (c_max < X)
Now this conversion is removed, we dont produce any range lists for such
conditions.
parent b5f814ab
...@@ -838,3 +838,25 @@ select a, hex(filler) from t1 where a not between 'b' and 'b'; ...@@ -838,3 +838,25 @@ select a, hex(filler) from t1 where a not between 'b' and 'b';
a hex(filler) a hex(filler)
a 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 a 0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
drop table t1,t2,t3; drop table t1,t2,t3;
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, key(a));
insert into t2 select 2*(A.a + 10*(B.a + 10*C.a)) from t1 A, t1 B, t1 C;
set @a="select * from t2 force index (a) where a NOT IN(0";
select count(*) from (select @a:=concat(@a, ',', a) from t2 ) Z;
count(*)
1000
set @a=concat(@a, ')');
insert into t2 values (11),(13),(15);
set @b= concat("explain ", @a);
prepare stmt1 from @b;
execute stmt1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 index a a 5 NULL 1003 Using where; Using index
prepare stmt1 from @a;
execute stmt1;
a
11
13
15
drop table t1, t2;
...@@ -656,3 +656,28 @@ explain select * from t1 where a not between 'b' and 'b'; ...@@ -656,3 +656,28 @@ explain select * from t1 where a not between 'b' and 'b';
select a, hex(filler) from t1 where a not between 'b' and 'b'; select a, hex(filler) from t1 where a not between 'b' and 'b';
drop table t1,t2,t3; drop table t1,t2,t3;
#
# BUG#21282
#
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2 (a int, key(a));
insert into t2 select 2*(A.a + 10*(B.a + 10*C.a)) from t1 A, t1 B, t1 C;
set @a="select * from t2 force index (a) where a NOT IN(0";
select count(*) from (select @a:=concat(@a, ',', a) from t2 ) Z;
set @a=concat(@a, ')');
insert into t2 values (11),(13),(15);
set @b= concat("explain ", @a);
prepare stmt1 from @b;
execute stmt1;
prepare stmt1 from @a;
execute stmt1;
drop table t1, t2;
# End of 5.0 tests
...@@ -3608,41 +3608,33 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, ...@@ -3608,41 +3608,33 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
if (func->array && func->cmp_type != ROW_RESULT) if (func->array && func->cmp_type != ROW_RESULT)
{ {
/* /*
We get here for conditions in form "t.key NOT IN (c1, c2, ...)" We get here for conditions in form "t.key NOT IN (c1, c2, ...)",
(where c{i} are constants). where c{i} are constants. Our goal is to produce a SEL_TREE that
Our goal is to produce a SEL_ARG graph that represents intervals: represents intervals:
($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ... (*) ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ... (*)
where $MIN is either "-inf" or NULL. where $MIN is either "-inf" or NULL.
The most straightforward way to handle NOT IN would be to convert The most straightforward way to produce it is to convert NOT IN
it to "(t.key != c1) AND (t.key != c2) AND ..." and let the range into "(t.key != c1) AND (t.key != c2) AND ... " and let the range
optimizer to build SEL_ARG graph from that. However that will cause analyzer to build SEL_TREE from that. The problem is that the
the range optimizer to use O(N^2) memory (it's a bug, not filed), range analyzer will use O(N^2) memory (which is probably a bug),
and people do use big NOT IN lists (see BUG#15872). Also, for big and people do use big NOT IN lists (e.g. see BUG#15872, BUG#21282),
NOT IN lists constructing/using graph (*) does not make the query will run out of memory.
faster.
Another problem with big lists like (*) is that a big list is
So, we will handle NOT IN manually in the following way: unlikely to produce a good "range" access, while considering that
* if the number of entries in the NOT IN list is less then range access will require expensive CPU calculations (and for
NOT_IN_IGNORE_THRESHOLD, we will construct SEL_ARG graph (*) MyISAM even index accesses). In short, big NOT IN lists are rarely
manually. worth analyzing.
* Otherwise, we will construct a smaller graph: for
"t.key NOT IN (c1,...cN)" we construct a graph representing Considering the above, we'll handle NOT IN as follows:
($MIN < t.key) OR (cN < t.key) // here sequence of c_i is * if the number of entries in the NOT IN list is less than
// ordered. NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*) manually.
* Otherwise, don't produce a SEL_TREE.
A note about partially-covering indexes: for those (e.g. for
"a CHAR(10), KEY(a(5))") the handling is correct (albeit not very
efficient):
Instead of "t.key < c1" we get "t.key <= prefix-val(c1)".
Combining the intervals in (*) together, we get:
(-inf<=t.key<=c1) OR (c1<=t.key<=c2) OR (c2<=t.key<=c3) OR ...
i.e. actually we get intervals combined into one interval:
(-inf<=t.key<=+inf). This doesn't make much sense but it doesn't
cause any problems.
*/ */
#define NOT_IN_IGNORE_THRESHOLD 1000
MEM_ROOT *tmp_root= param->mem_root; MEM_ROOT *tmp_root= param->mem_root;
param->thd->mem_root= param->old_root; param->thd->mem_root= param->old_root;
/* /*
...@@ -3656,9 +3648,9 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, ...@@ -3656,9 +3648,9 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
Item *value_item= func->array->create_item(); Item *value_item= func->array->create_item();
param->thd->mem_root= tmp_root; param->thd->mem_root= tmp_root;
if (!value_item) if (func->array->count > NOT_IN_IGNORE_THRESHOLD || !value_item)
break; break;
/* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */ /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */
uint i=0; uint i=0;
do do
...@@ -3677,45 +3669,39 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, ...@@ -3677,45 +3669,39 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
tree= NULL; tree= NULL;
break; break;
} }
#define NOT_IN_IGNORE_THRESHOLD 1000
SEL_TREE *tree2; SEL_TREE *tree2;
if (func->array->count < NOT_IN_IGNORE_THRESHOLD) for (; i < func->array->count; i++)
{ {
for (; i < func->array->count; i++) if (func->array->compare_elems(i, i-1))
{ {
if (func->array->compare_elems(i, i-1)) /* Get a SEL_TREE for "-inf < X < c_i" interval */
func->array->value_to_item(i, value_item);
tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
value_item, cmp_type);
if (!tree2)
{ {
/* Get a SEL_TREE for "-inf < X < c_i" interval */ tree= NULL;
func->array->value_to_item(i, value_item); break;
tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC, }
value_item, cmp_type);
if (!tree2)
{
tree= NULL;
break;
}
/* Change all intervals to be "c_{i-1} < X < c_i" */ /* Change all intervals to be "c_{i-1} < X < c_i" */
for (uint idx= 0; idx < param->keys; idx++) for (uint idx= 0; idx < param->keys; idx++)
{
SEL_ARG *new_interval, *last_val;
if (((new_interval= tree2->keys[idx])) &&
((last_val= tree->keys[idx]->last())))
{ {
SEL_ARG *new_interval, *last_val; new_interval->min_value= last_val->max_value;
if (((new_interval= tree2->keys[idx])) && new_interval->min_flag= NEAR_MIN;
((last_val= tree->keys[idx]->last())))
{
new_interval->min_value= last_val->max_value;
new_interval->min_flag= NEAR_MIN;
}
} }
/*
The following doesn't try to allocate memory so no need to
check for NULL.
*/
tree= tree_or(param, tree, tree2);
} }
/*
The following doesn't try to allocate memory so no need to
check for NULL.
*/
tree= tree_or(param, tree, tree2);
} }
} }
else
func->array->value_to_item(func->array->count - 1, value_item);
if (tree && tree->type != SEL_TREE::IMPOSSIBLE) if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
{ {
...@@ -3780,7 +3766,6 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func, ...@@ -3780,7 +3766,6 @@ static SEL_TREE *get_func_mm_tree(PARAM *param, Item_func *cond_func,
} }
DBUG_RETURN(tree); DBUG_RETURN(tree);
} }
/* make a select tree of all keys in condition */ /* make a select tree of all keys in condition */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment