Commit eded6073 authored by Martin Hansson's avatar Martin Hansson

Bug#42846: wrong result returned for range scan when using

covering index
      
When two range predicates were combined under an OR
predicate, the algorithm tried to merge overlapping ranges
into one. But the case when a range overlapped several other
ranges was not handled. This lead to

1) ranges overlapping, which gave repeated results and 
2) a range that overlapped several other ranges was cut off.  

Fixed by 

1) Making sure that a range got an upper bound equal to the
next range with a greater minimum.
2) Removing a continue statement
parent bf711a74
...@@ -876,10 +876,10 @@ id select_type table type possible_keys key key_len ref rows Extra ...@@ -876,10 +876,10 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL idx_t1_1 163 NULL 17 Using where; Using index for group-by 1 SIMPLE t1 range NULL idx_t1_1 163 NULL 17 Using where; Using index for group-by
explain select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; explain select a1,a2,b, max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL idx_t1_1 147 NULL 17 Using where; Using index for group-by 1 SIMPLE t1 range NULL idx_t1_1 163 NULL 17 Using where; Using index for group-by
explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL idx_t1_1 147 NULL 17 Using where; Using index for group-by 1 SIMPLE t1 range NULL idx_t1_1 163 NULL 17 Using where; Using index for group-by
explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b; explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range NULL idx_t1_1 163 NULL 17 Using where; Using index for group-by 1 SIMPLE t1 range NULL idx_t1_1 163 NULL 17 Using where; Using index for group-by
...@@ -924,7 +924,7 @@ id select_type table type possible_keys key key_len ref rows Extra ...@@ -924,7 +924,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by 1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
explain select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; explain select a1,a2,b, max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using where; Using index for group-by 1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b; explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by 1 SIMPLE t2 range NULL idx_t2_1 163 NULL # Using where; Using index for group-by
......
...@@ -1219,3 +1219,182 @@ explain select * from t2 where a=1000 and b<11; ...@@ -1219,3 +1219,182 @@ explain select * from t2 where a=1000 and b<11;
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ref a a 5 const 502 Using where 1 SIMPLE t2 ref a a 5 const 502 Using where
drop table t1, t2; drop table t1, t2;
CREATE TABLE t1( a INT, b INT, KEY( a, b ) );
CREATE TABLE t2( a INT, b INT, KEY( a, b ) );
CREATE TABLE t3( a INT, b INT, KEY( a, b ) );
INSERT INTO t1( a, b )
VALUES (0, 1), (1, 2), (1, 4), (2, 3), (5, 0), (9, 7);
INSERT INTO t2( a, b )
VALUES ( 1, 1), ( 2, 1), ( 3, 1), ( 4, 1), ( 5, 1),
( 6, 1), ( 7, 1), ( 8, 1), ( 9, 1), (10, 1),
(11, 1), (12, 1), (13, 1), (14, 1), (15, 1),
(16, 1), (17, 1), (18, 1), (19, 1), (20, 1);
INSERT INTO t2 SELECT a, 2 FROM t2 WHERE b = 1;
INSERT INTO t2 SELECT a, 3 FROM t2 WHERE b = 1;
INSERT INTO t2 SELECT -1, -1 FROM t2;
INSERT INTO t2 SELECT -1, -1 FROM t2;
INSERT INTO t2 SELECT -1, -1 FROM t2;
INSERT INTO t3
VALUES (1, 0), (2, 0), (3, 0), (4, 0), (5, 0),
(6, 0), (7, 0), (8, 0), (9, 0), (10, 0);
INSERT INTO t3 SELECT * FROM t3 WHERE a = 10;
INSERT INTO t3 SELECT * FROM t3 WHERE a = 10;
SELECT * FROM t1 WHERE
3 <= a AND a < 5 OR
5 < a AND b = 3 OR
3 <= a;
a b
5 0
9 7
EXPLAIN
SELECT * FROM t1 WHERE
3 <= a AND a < 5 OR
5 < a AND b = 3 OR
3 <= a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 5 NULL 3 Using where; Using index
SELECT * FROM t1 WHERE
3 <= a AND a < 5 OR
5 <= a AND b = 3 OR
3 <= a;
a b
5 0
9 7
EXPLAIN
SELECT * FROM t1 WHERE
3 <= a AND a < 5 OR
5 <= a AND b = 3 OR
3 <= a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 5 NULL 4 Using where; Using index
SELECT * FROM t1 WHERE
3 <= a AND a <= 5 OR
5 <= a AND b = 3 OR
3 <= a;
a b
5 0
9 7
EXPLAIN
SELECT * FROM t1 WHERE
3 <= a AND a <= 5 OR
5 <= a AND b = 3 OR
3 <= a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 5 NULL 3 Using where; Using index
SELECT * FROM t1 WHERE
3 <= a AND a <= 5 OR
3 <= a;
a b
5 0
9 7
EXPLAIN
SELECT * FROM t1 WHERE
3 <= a AND a <= 5 OR
3 <= a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 5 NULL 3 Using where; Using index
SELECT * FROM t2 WHERE
5 <= a AND a < 10 AND b = 1 OR
15 <= a AND a < 20 AND b = 3
OR
1 <= a AND b = 1;
a b
1 1
2 1
3 1
4 1
5 1
6 1
7 1
8 1
9 1
10 1
11 1
12 1
13 1
14 1
15 1
15 3
16 1
16 3
17 1
17 3
18 1
18 3
19 1
19 3
20 1
EXPLAIN
SELECT * FROM t2 WHERE
5 <= a AND a < 10 AND b = 1 OR
15 <= a AND a < 20 AND b = 3
OR
1 <= a AND b = 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range a a 10 NULL 50 Using where; Using index
SELECT * FROM t2 WHERE
5 <= a AND a < 10 AND b = 2 OR
15 <= a AND a < 20 AND b = 3
OR
1 <= a AND b = 1;
a b
1 1
2 1
3 1
4 1
5 1
5 2
6 1
6 2
7 1
7 2
8 1
8 2
9 1
9 2
10 1
11 1
12 1
13 1
14 1
15 1
15 3
16 1
16 3
17 1
17 3
18 1
18 3
19 1
19 3
20 1
EXPLAIN
SELECT * FROM t2 WHERE
5 <= a AND a < 10 AND b = 2 OR
15 <= a AND a < 20 AND b = 3
OR
1 <= a AND b = 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 range a a 10 NULL 50 Using where; Using index
SELECT * FROM t3 WHERE
5 <= a AND a < 10 AND b = 3 OR
a < 5 OR
a < 10;
a b
1 0
2 0
3 0
4 0
5 0
6 0
7 0
8 0
9 0
EXPLAIN
SELECT * FROM t3 WHERE
5 <= a AND a < 10 AND b = 3 OR
a < 5 OR
a < 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t3 range a a 5 NULL 8 Using where; Using index
DROP TABLE t1, t2, t3;
...@@ -1046,3 +1046,128 @@ explain select * from t2 where a=1000 and b<11; ...@@ -1046,3 +1046,128 @@ explain select * from t2 where a=1000 and b<11;
drop table t1, t2; drop table t1, t2;
#
# Bug#42846: wrong result returned for range scan when using covering index
#
CREATE TABLE t1( a INT, b INT, KEY( a, b ) );
CREATE TABLE t2( a INT, b INT, KEY( a, b ) );
CREATE TABLE t3( a INT, b INT, KEY( a, b ) );
INSERT INTO t1( a, b )
VALUES (0, 1), (1, 2), (1, 4), (2, 3), (5, 0), (9, 7);
INSERT INTO t2( a, b )
VALUES ( 1, 1), ( 2, 1), ( 3, 1), ( 4, 1), ( 5, 1),
( 6, 1), ( 7, 1), ( 8, 1), ( 9, 1), (10, 1),
(11, 1), (12, 1), (13, 1), (14, 1), (15, 1),
(16, 1), (17, 1), (18, 1), (19, 1), (20, 1);
INSERT INTO t2 SELECT a, 2 FROM t2 WHERE b = 1;
INSERT INTO t2 SELECT a, 3 FROM t2 WHERE b = 1;
# To make range scan compelling to the optimizer
INSERT INTO t2 SELECT -1, -1 FROM t2;
INSERT INTO t2 SELECT -1, -1 FROM t2;
INSERT INTO t2 SELECT -1, -1 FROM t2;
INSERT INTO t3
VALUES (1, 0), (2, 0), (3, 0), (4, 0), (5, 0),
(6, 0), (7, 0), (8, 0), (9, 0), (10, 0);
# To make range scan compelling to the optimizer
INSERT INTO t3 SELECT * FROM t3 WHERE a = 10;
INSERT INTO t3 SELECT * FROM t3 WHERE a = 10;
#
# Problem#1 Test queries. Will give missing results unless Problem#1 is fixed.
# With one exception, they are independent of Problem#2.
#
SELECT * FROM t1 WHERE
3 <= a AND a < 5 OR
5 < a AND b = 3 OR
3 <= a;
EXPLAIN
SELECT * FROM t1 WHERE
3 <= a AND a < 5 OR
5 < a AND b = 3 OR
3 <= a;
# Query below: Tests both Problem#1 and Problem#2 (EXPLAIN differs as well)
SELECT * FROM t1 WHERE
3 <= a AND a < 5 OR
5 <= a AND b = 3 OR
3 <= a;
EXPLAIN
SELECT * FROM t1 WHERE
3 <= a AND a < 5 OR
5 <= a AND b = 3 OR
3 <= a;
SELECT * FROM t1 WHERE
3 <= a AND a <= 5 OR
5 <= a AND b = 3 OR
3 <= a;
EXPLAIN
SELECT * FROM t1 WHERE
3 <= a AND a <= 5 OR
5 <= a AND b = 3 OR
3 <= a;
SELECT * FROM t1 WHERE
3 <= a AND a <= 5 OR
3 <= a;
EXPLAIN
SELECT * FROM t1 WHERE
3 <= a AND a <= 5 OR
3 <= a;
#
# Problem#2 Test queries.
# These queries will give missing results if Problem#1 is fixed.
# But Problem#1 also hides this bug.
#
SELECT * FROM t2 WHERE
5 <= a AND a < 10 AND b = 1 OR
15 <= a AND a < 20 AND b = 3
OR
1 <= a AND b = 1;
EXPLAIN
SELECT * FROM t2 WHERE
5 <= a AND a < 10 AND b = 1 OR
15 <= a AND a < 20 AND b = 3
OR
1 <= a AND b = 1;
SELECT * FROM t2 WHERE
5 <= a AND a < 10 AND b = 2 OR
15 <= a AND a < 20 AND b = 3
OR
1 <= a AND b = 1;
EXPLAIN
SELECT * FROM t2 WHERE
5 <= a AND a < 10 AND b = 2 OR
15 <= a AND a < 20 AND b = 3
OR
1 <= a AND b = 1;
SELECT * FROM t3 WHERE
5 <= a AND a < 10 AND b = 3 OR
a < 5 OR
a < 10;
EXPLAIN
SELECT * FROM t3 WHERE
5 <= a AND a < 10 AND b = 3 OR
a < 5 OR
a < 10;
DROP TABLE t1, t2, t3;
...@@ -6512,6 +6512,63 @@ get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1) ...@@ -6512,6 +6512,63 @@ get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
} }
/**
Combine two range expression under a common OR. On a logical level, the
transformation is key_or( expr1, expr2 ) => expr1 OR expr2.
Both expressions are assumed to be in the SEL_ARG format. In a logic sense,
theformat is reminiscent of DNF, since an expression such as the following
( 1 < kp1 < 10 AND p1 ) OR ( 10 <= kp2 < 20 AND p2 )
where there is a key consisting of keyparts ( kp1, kp2, ..., kpn ) and p1
and p2 are valid SEL_ARG expressions over keyparts kp2 ... kpn, is a valid
SEL_ARG condition. The disjuncts appear ordered by the minimum endpoint of
the first range and ranges must not overlap. It follows that they are also
ordered by maximum endpoints. Thus
( 1 < kp1 <= 2 AND ( kp2 = 2 OR kp2 = 3 ) ) OR kp1 = 3
Is a a valid SER_ARG expression for a key of at least 2 keyparts.
For simplicity, we will assume that expr2 is a single range predicate,
i.e. on the form ( a < x < b AND ... ). It is easy to generalize to a
disjunction of several predicates by subsequently call key_or for each
disjunct.
The algorithm iterates over each disjunct of expr1, and for each disjunct
where the first keypart's range overlaps with the first keypart's range in
expr2:
If the predicates are equal for the rest of the keyparts, or if there are
no more, the range in expr2 has its endpoints copied in, and the SEL_ARG
node in expr2 is deallocated. If more ranges became connected in expr1, the
surplus is also dealocated. If they differ, two ranges are created.
- The range leading up to the overlap. Empty if endpoints are equal.
- The overlapping sub-range. May be the entire range if they are equal.
Finally, there may be one more range if expr2's first keypart's range has a
greater maximum endpoint than the last range in expr1.
For the overlapping sub-range, we recursively call key_or. Thus in order to
compute key_or of
(1) ( 1 < kp1 < 10 AND 1 < kp2 < 10 )
(2) ( 2 < kp1 < 20 AND 4 < kp2 < 20 )
We create the ranges 1 < kp <= 2, 2 < kp1 < 10, 10 <= kp1 < 20. For the
first one, we simply hook on the condition for the second keypart from (1)
: 1 < kp2 < 10. For the second range 2 < kp1 < 10, key_or( 1 < kp2 < 10, 4
< kp2 < 20 ) is called, yielding 1 < kp2 < 20. For the last range, we reuse
the range 4 < kp2 < 20 from (2) for the second keypart. The result is thus
( 1 < kp1 <= 2 AND 1 < kp2 < 10 ) OR
( 2 < kp1 < 10 AND 1 < kp2 < 20 ) OR
( 10 <= kp1 < 20 AND 4 < kp2 < 20 )
*/
static SEL_ARG * static SEL_ARG *
key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2) key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
{ {
...@@ -6663,7 +6720,21 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2) ...@@ -6663,7 +6720,21 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
key1=key1->tree_delete(save); key1=key1->tree_delete(save);
} }
last->copy_min(tmp); last->copy_min(tmp);
if (last->copy_min(key2) || last->copy_max(key2)) bool full_range= last->copy_min(key2);
if (!full_range)
{
if (last->next && key2->cmp_max_to_min(last->next) >= 0)
{
last->max_value= last->next->min_value;
if (last->next->min_flag & NEAR_MIN)
last->max_flag&= ~NEAR_MAX;
else
last->max_flag|= NEAR_MAX;
}
else
full_range= last->copy_max(key2);
}
if (full_range)
{ // Full range { // Full range
key1->free_tree(); key1->free_tree();
for (; key2 ; key2=key2->next) for (; key2 ; key2=key2->next)
...@@ -6673,8 +6744,6 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2) ...@@ -6673,8 +6744,6 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
return 0; return 0;
} }
} }
key2=key2->next;
continue;
} }
if (cmp >= 0 && tmp->cmp_min_to_min(key2) < 0) if (cmp >= 0 && tmp->cmp_min_to_min(key2) < 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment