Commit a8d43972 authored by sergefp@mysql.com's avatar sergefp@mysql.com

BUG#26624: high mem usage (crash) in range optimizer

- Added PARAM::alloced_sel_args where we count the # of SEL_ARGs
  created by SEL_ARG tree cloning operations.
- Made the range analyzer to shortcut and not do any more cloning 
  if we've already created MAX_SEL_ARGS SEL_ARG objects in cloning.
- Added comments about space complexity of SEL_ARG-graph 
  representation.
parent ce3f1826
......@@ -701,4 +701,32 @@ d8c4177d225791924.30714720
d8c4177d2380fc201.39666693
d8c4177d24ccef970.14957924
DROP TABLE t1;
create table t1 (
c1 char(10), c2 char(10), c3 char(10), c4 char(10),
c5 char(10), c6 char(10), c7 char(10), c8 char(10),
c9 char(10), c10 char(10), c11 char(10), c12 char(10),
c13 char(10), c14 char(10), c15 char(10), c16 char(10),
index(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,c13,c14,c15,c16)
);
insert into t1 (c1) values ('1'),('1'),('1'),('1');
select * from t1 where
c1 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c2 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c3 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c4 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c5 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c6 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c7 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c8 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c9 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c10 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c11 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c12 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c13 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c14 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c15 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c16 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
;
c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16
drop table t1;
End of 4.1 tests
......@@ -563,4 +563,36 @@ SELECT s.oxid FROM t1 v, t1 s
DROP TABLE t1;
# BUG#26624 high mem usage (crash) in range optimizer (depends on order of fields in where)
create table t1 (
c1 char(10), c2 char(10), c3 char(10), c4 char(10),
c5 char(10), c6 char(10), c7 char(10), c8 char(10),
c9 char(10), c10 char(10), c11 char(10), c12 char(10),
c13 char(10), c14 char(10), c15 char(10), c16 char(10),
index(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,c13,c14,c15,c16)
);
insert into t1 (c1) values ('1'),('1'),('1'),('1');
# This must run without crash and fast:
select * from t1 where
c1 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c2 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c3 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c4 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c5 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c6 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c7 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c8 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c9 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c10 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c11 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c12 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c13 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c14 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c15 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
and c16 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
;
drop table t1;
--echo End of 4.1 tests
......@@ -128,6 +128,89 @@ static char is_null_string[2]= {1,0};
- get_quick_select() - Walk the SEL_ARG, materialize the key intervals,
and create QUICK_RANGE_SELECT object that will
read records within these intervals.
4. SPACE COMPLEXITY NOTES
SEL_ARG graph is a representation of an ordered disjoint sequence of
intervals over the ordered set of index tuple values.
For multi-part keys, one can construct a WHERE expression such that its
list of intervals will be of combinatorial size. Here is an example:
(keypart1 IN (1,2, ..., n1)) AND
(keypart2 IN (1,2, ..., n2)) AND
(keypart3 IN (1,2, ..., n3))
For this WHERE clause the list of intervals will have n1*n2*n3 intervals
of form
(keypart1, keypart2, keypart3) = (k1, k2, k3), where 1 <= k{i} <= n{i}
SEL_ARG graph structure aims to reduce the amount of required space by
"sharing" the elementary intervals when possible (the pic at the
beginning of this comment has examples of such sharing). The sharing may
prevent combinatorial blowup:
There are WHERE clauses that have combinatorial-size interval lists but
will be represented by a compact SEL_ARG graph.
Example:
(keypartN IN (1,2, ..., n1)) AND
...
(keypart2 IN (1,2, ..., n2)) AND
(keypart1 IN (1,2, ..., n3))
but not in all cases:
- There are WHERE clauses that do have a compact SEL_ARG-graph
representation but get_mm_tree() and its callees will construct a
graph of combinatorial size.
Example:
(keypart1 IN (1,2, ..., n1)) AND
(keypart2 IN (1,2, ..., n2)) AND
...
(keypartN IN (1,2, ..., n3))
- There are WHERE clauses for which the minimal possible SEL_ARG graph
representation will have combinatorial size.
Example:
By induction: Let's take any interval on some keypart in the middle:
kp15=1
Then let's AND it with this interval 'structure' from preceding and
following keyparts:
(kp14=c1 AND kp16=c3) OR keypart14=c2) (*)
We will obtain this SEL_ARG graph:
kp14 $ kp15 $ kp16
$ $
+---------+ $ +--------+ $ +---------+
| kp14=c1 |--$-->| kp15=1 |--$-->| kp16=c3 |
+---------+ $ +--------+ $ +---------+
| $ $
+---------+ $ +--------+ $
| kp14=c2 |--$-->| kp15=1 | $
+---------+ $ +--------+ $
$ $
Note that we had to duplicate "kp15=1" and there was no way to avoid
that.
The induction step: AND the obtained expression with another "wrapping"
expression like (*).
When the process ends because of the limit on max. number of keyparts
we'll have:
WHERE clause length is O(3*#max_keyparts)
SEL_ARG graph size is O(2^(#max_keyparts/2))
(it is also possible to construct a case where instead of 2 in 2^n we
have a bigger constant, e.g. 4, and get a graph with 4^(31/2)= 2^31
nodes)
We avoid consuming too much memory by setting a limit on the number of
SEL_ARG object we can construct during one range analysis invocation.
*/
class SEL_ARG :public Sql_alloc
......@@ -158,6 +241,8 @@ public:
enum leaf_color { BLACK,RED } color;
enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type;
enum { MAX_SEL_ARGS = 64000 };
SEL_ARG() {}
SEL_ARG(SEL_ARG &);
SEL_ARG(Field *,const char *,const char *);
......@@ -227,7 +312,8 @@ public:
return new SEL_ARG(field, part, min_value, arg->max_value,
min_flag, arg->max_flag, maybe_flag | arg->maybe_flag);
}
SEL_ARG *clone(SEL_ARG *new_parent,SEL_ARG **next);
SEL_ARG *clone(struct st_qsel_param *param, SEL_ARG *new_parent,
SEL_ARG **next);
bool copy_min(SEL_ARG* arg)
{ // Get overlapping range
......@@ -365,7 +451,7 @@ public:
{
return parent->left == this ? &parent->left : &parent->right;
}
SEL_ARG *clone_tree();
SEL_ARG *clone_tree(struct st_qsel_param *param);
};
......@@ -391,6 +477,8 @@ typedef struct st_qsel_param {
max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
bool quick; // Don't calulate possible keys
COND *cond;
/* Numbr of SEL_ARG objects allocated by SEL_ARG::clone_tree operations */
uint alloced_sel_args;
} PARAM;
static SEL_TREE * get_mm_parts(PARAM *param,COND *cond_func,Field *field,
......@@ -413,8 +501,8 @@ static void print_quick(QUICK_SELECT *quick,const key_map* needed_reg);
static SEL_TREE *tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
static SEL_TREE *tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
static SEL_ARG *key_or(SEL_ARG *key1,SEL_ARG *key2);
static SEL_ARG *key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag);
static SEL_ARG *key_or(PARAM *param, SEL_ARG *key1,SEL_ARG *key2);
static SEL_ARG *key_and(PARAM *param, SEL_ARG *key1,SEL_ARG *key2,uint clone_flag);
static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
static bool get_quick_keys(PARAM *param,QUICK_SELECT *quick,KEY_PART *key,
SEL_ARG *key_tree,char *min_key,uint min_key_flag,
......@@ -424,6 +512,7 @@ static bool eq_tree(SEL_ARG* a,SEL_ARG *b);
static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length);
/***************************************************************************
** Basic functions for SQL_SELECT and QUICK_SELECT
***************************************************************************/
......@@ -568,12 +657,17 @@ SEL_ARG::SEL_ARG(Field *field_,uint8 part_,char *min_value_,char *max_value_,
left=right= &null_element;
}
SEL_ARG *SEL_ARG::clone(SEL_ARG *new_parent,SEL_ARG **next_arg)
SEL_ARG *SEL_ARG::clone(PARAM *param, SEL_ARG *new_parent, SEL_ARG **next_arg)
{
SEL_ARG *tmp;
/* Bail out if we have already generated too many SEL_ARGs */
if (++param->alloced_sel_args > MAX_SEL_ARGS)
return 0;
if (type != KEY_RANGE)
{
if (!(tmp= new SEL_ARG(type)))
if (!(tmp= new (param->mem_root) SEL_ARG(type)))
return 0; // out of memory
tmp->prev= *next_arg; // Link into next/prev chain
(*next_arg)->next=tmp;
......@@ -581,20 +675,20 @@ SEL_ARG *SEL_ARG::clone(SEL_ARG *new_parent,SEL_ARG **next_arg)
}
else
{
if (!(tmp= new SEL_ARG(field,part, min_value,max_value,
min_flag, max_flag, maybe_flag)))
if (!(tmp= new (param->mem_root) SEL_ARG(field,part, min_value,max_value,
min_flag, max_flag, maybe_flag)))
return 0; // OOM
tmp->parent=new_parent;
tmp->next_key_part=next_key_part;
if (left != &null_element)
tmp->left=left->clone(tmp,next_arg);
tmp->left=left->clone(param, tmp, next_arg);
tmp->prev= *next_arg; // Link into next/prev chain
(*next_arg)->next=tmp;
(*next_arg)= tmp;
if (right != &null_element)
if (!(tmp->right= right->clone(tmp,next_arg)))
if (!(tmp->right= right->clone(param, tmp, next_arg)))
return 0; // OOM
}
increment_use_count(1);
......@@ -672,11 +766,12 @@ static int sel_cmp(Field *field, char *a,char *b,uint8 a_flag,uint8 b_flag)
}
SEL_ARG *SEL_ARG::clone_tree()
SEL_ARG *SEL_ARG::clone_tree(PARAM *param)
{
SEL_ARG tmp_link,*next_arg,*root;
next_arg= &tmp_link;
root= clone((SEL_ARG *) 0, &next_arg);
if (!(root= clone(param, (SEL_ARG *) 0, &next_arg)))
return 0;
next_arg->next=0; // Fix last link
tmp_link.next->prev=0; // Fix first link
if (root) // If not OOM
......@@ -890,6 +985,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
param.real_keynr[param.keys++]=idx;
}
param.key_parts_end=key_parts;
param.alloced_sel_args= 0;
if ((tree=get_mm_tree(&param,cond)))
{
......@@ -991,7 +1087,8 @@ static SEL_TREE *get_mm_tree(PARAM *param,COND *cond)
while ((item=li++))
{
SEL_TREE *new_tree=get_mm_tree(param,item);
if (param->thd->is_fatal_error)
if (param->thd->is_fatal_error ||
param->alloced_sel_args > SEL_ARG::MAX_SEL_ARGS)
DBUG_RETURN(0); // out of memory
tree=tree_and(param,tree,new_tree);
if (tree && tree->type == SEL_TREE::IMPOSSIBLE)
......@@ -1524,7 +1621,7 @@ tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
tree1->type=SEL_TREE::KEY_SMALLER;
DBUG_RETURN(tree1);
}
/* Join the trees key per key */
SEL_ARG **key1,**key2,**end;
for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
......@@ -1537,7 +1634,7 @@ tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
flag|=CLONE_KEY1_MAYBE;
if (*key2 && !(*key2)->simple_key())
flag|=CLONE_KEY2_MAYBE;
*key1=key_and(*key1,*key2,flag);
*key1=key_and(param, *key1, *key2, flag);
if (*key1 && (*key1)->type == SEL_ARG::IMPOSSIBLE)
{
tree1->type= SEL_TREE::IMPOSSIBLE;
......@@ -1574,7 +1671,7 @@ tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
key1 != end ; key1++,key2++)
{
*key1=key_or(*key1,*key2);
*key1= key_or(param, *key1, *key2);
if (*key1)
{
result=tree1; // Added to tree1
......@@ -1590,14 +1687,14 @@ tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
/* And key trees where key1->part < key2 -> part */
static SEL_ARG *
and_all_keys(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag)
and_all_keys(PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
{
SEL_ARG *next;
ulong use_count=key1->use_count;
if (key1->elements != 1)
{
key2->use_count+=key1->elements-1;
key2->use_count+=key1->elements-1; //psergey: why we don't count that key1 has n-k-p?
key2->increment_use_count((int) key1->elements-1);
}
if (key1->type == SEL_ARG::MAYBE_KEY)
......@@ -1609,7 +1706,7 @@ and_all_keys(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag)
{
if (next->next_key_part)
{
SEL_ARG *tmp=key_and(next->next_key_part,key2,clone_flag);
SEL_ARG *tmp= key_and(param, next->next_key_part, key2, clone_flag);
if (tmp && tmp->type == SEL_ARG::IMPOSSIBLE)
{
key1=key1->tree_delete(next);
......@@ -1618,6 +1715,8 @@ and_all_keys(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag)
next->next_key_part=tmp;
if (use_count)
next->increment_use_count(use_count);
if (param->alloced_sel_args > SEL_ARG::MAX_SEL_ARGS)
break;
}
else
next->next_key_part=key2;
......@@ -1644,7 +1743,7 @@ and_all_keys(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag)
*/
static SEL_ARG *
key_and(SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
key_and(PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
{
if (!key1)
return key2;
......@@ -1660,9 +1759,9 @@ key_and(SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
// key1->part < key2->part
key1->use_count--;
if (key1->use_count > 0)
if (!(key1= key1->clone_tree()))
if (!(key1= key1->clone_tree(param)))
return 0; // OOM
return and_all_keys(key1,key2,clone_flag);
return and_all_keys(param, key1, key2, clone_flag);
}
if (((clone_flag & CLONE_KEY2_MAYBE) &&
......@@ -1680,14 +1779,14 @@ key_and(SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
if (key1->use_count > 1)
{
key1->use_count--;
if (!(key1=key1->clone_tree()))
if (!(key1=key1->clone_tree(param)))
return 0; // OOM
key1->use_count++;
}
if (key1->type == SEL_ARG::MAYBE_KEY)
{ // Both are maybe key
key1->next_key_part=key_and(key1->next_key_part,key2->next_key_part,
clone_flag);
key1->next_key_part=key_and(param, key1->next_key_part,
key2->next_key_part, clone_flag);
if (key1->next_key_part &&
key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
return key1;
......@@ -1698,7 +1797,7 @@ key_and(SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
if (key2->next_key_part)
{
key1->use_count--; // Incremented in and_all_keys
return and_all_keys(key1,key2,clone_flag);
return and_all_keys(param, key1, key2, clone_flag);
}
key2->use_count--; // Key2 doesn't have a tree
}
......@@ -1727,7 +1826,8 @@ key_and(SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
}
else if (get_range(&e2,&e1,key2))
continue;
SEL_ARG *next=key_and(e1->next_key_part,e2->next_key_part,clone_flag);
SEL_ARG *next=key_and(param, e1->next_key_part, e2->next_key_part,
clone_flag);
e1->increment_use_count(1);
e2->increment_use_count(1);
if (!next || next->type != SEL_ARG::IMPOSSIBLE)
......@@ -1775,7 +1875,7 @@ get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
static SEL_ARG *
key_or(SEL_ARG *key1,SEL_ARG *key2)
key_or(PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
{
if (!key1)
{
......@@ -1823,7 +1923,7 @@ key_or(SEL_ARG *key1,SEL_ARG *key2)
{
swap_variables(SEL_ARG *,key1,key2);
}
if (key1->use_count > 0 || !(key1=key1->clone_tree()))
if (key1->use_count > 0 || !(key1=key1->clone_tree(param)))
return 0; // OOM
}
......@@ -1967,7 +2067,7 @@ key_or(SEL_ARG *key1,SEL_ARG *key2)
{ // tmp.min. <= x <= tmp.max
tmp->maybe_flag|= key.maybe_flag;
key.increment_use_count(key1->use_count+1);
tmp->next_key_part=key_or(tmp->next_key_part,key.next_key_part);
tmp->next_key_part= key_or(param, tmp->next_key_part, key.next_key_part);
if (!cmp) // Key2 is ready
break;
key.copy_max_to_min(tmp);
......@@ -1998,7 +2098,7 @@ key_or(SEL_ARG *key1,SEL_ARG *key2)
tmp->increment_use_count(key1->use_count+1);
/* Increment key count as it may be used for next loop */
key.increment_use_count(1);
new_arg->next_key_part=key_or(tmp->next_key_part,key.next_key_part);
new_arg->next_key_part= key_or(param, tmp->next_key_part, key.next_key_part);
key1=key1->insert(new_arg);
break;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment