Bug#27573: MIN() on an indexed column which is always NULL sets _other_ results

to NULL

For queries of the form SELECT MIN(key_part_k) FROM t1 
WHERE key_part_1 = const and ... and key_part_k-1 = const,
the opt_sum_query optimization tries to
use an index to substitute MIN/MAX functions with their values according
to the following rules:
1) Insert the minimum non-null values where the WHERE clause still matches, or
3) A row of nulls

However, the correct semantics requires that there is a third case 2)
such that a NULL value is substituted if there are only NULL values for 
key_part_k.

The patch modifies opt_sum_query() to handle this missing case.
parent 54bc9c3d
set autocommit=1;
reset master;
create table bug16206 (a int);
insert into bug16206 values(1);
start transaction;
insert into bug16206 values(2);
commit;
show binlog events;
Log_name Pos Event_type Server_id End_log_pos Info
f n Format_desc 1 n Server ver: VERSION, Binlog ver: 4
f n Query 1 n use `test`; create table bug16206 (a int)
f n Query 1 n use `test`; insert into bug16206 values(1)
f n Query 1 n use `test`; insert into bug16206 values(2)
drop table bug16206;
reset master;
create table bug16206 (a int) engine= bdb;
insert into bug16206 values(0);
insert into bug16206 values(1);
start transaction;
insert into bug16206 values(2);
commit;
insert into bug16206 values(3);
show binlog events;
Log_name Pos Event_type Server_id End_log_pos Info
f n Format_desc 1 n Server ver: VERSION, Binlog ver: 4
f n Query 1 n use `test`; create table bug16206 (a int) engine= bdb
f n Query 1 n use `test`; insert into bug16206 values(0)
f n Query 1 n use `test`; insert into bug16206 values(1)
f n Query 1 n use `test`; BEGIN
f n Query 1 n use `test`; insert into bug16206 values(2)
f n Query 1 n use `test`; COMMIT
f n Query 1 n use `test`; insert into bug16206 values(3)
drop table bug16206;
set autocommit=0;
End of 5.0 tests
...@@ -1321,4 +1321,51 @@ SELECT a,AVG(DISTINCT b) AS average FROM t1 GROUP BY a HAVING average > 50; ...@@ -1321,4 +1321,51 @@ SELECT a,AVG(DISTINCT b) AS average FROM t1 GROUP BY a HAVING average > 50;
a average a average
1 32768.5000 1 32768.5000
DROP TABLE t1; DROP TABLE t1;
CREATE TABLE t1 ( a INT, b INT, KEY(a) );
INSERT INTO t1 VALUES (NULL, 1), (NULL, 2);
EXPLAIN SELECT MIN(a), MIN(b) FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 2
SELECT MIN(a), MIN(b) FROM t1;
MIN(a) MIN(b)
NULL 1
CREATE TABLE t2( a INT, b INT, c INT, KEY(a, b) );
INSERT INTO t2 ( a, b, c ) VALUES ( 1, NULL, 2 ), ( 1, 3, 4 ), ( 1, 4, 4 );
EXPLAIN SELECT MIN(b), MIN(c) FROM t2 WHERE a = 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ref a a 5 const 2 Using where
SELECT MIN(b), MIN(c) FROM t2 WHERE a = 1;
MIN(b) MIN(c)
3 2
CREATE TABLE t3 (a INT, b INT, c int, KEY(a, b));
INSERT INTO t3 VALUES (1, NULL, 1), (2, NULL, 2), (2, NULL, 2), (3, NULL, 3);
EXPLAIN SELECT MIN(a), MIN(b) FROM t3 where a = 2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
SELECT MIN(a), MIN(b) FROM t3 where a = 2;
MIN(a) MIN(b)
2 NULL
CREATE TABLE t4 (a INT, b INT, c int, KEY(a, b));
INSERT INTO t4 VALUES (1, 1, 1), (2, NULL, 2), (2, NULL, 2), (3, 1, 3);
EXPLAIN SELECT MIN(a), MIN(b) FROM t4 where a = 2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
SELECT MIN(a), MIN(b) FROM t4 where a = 2;
MIN(a) MIN(b)
2 NULL
SELECT MIN(b), min(c) FROM t4 where a = 2;
MIN(b) min(c)
NULL 2
CREATE TABLE t5( a INT, b INT, KEY( a, b) );
INSERT INTO t5 VALUES( 1, 1 ), ( 1, 2 );
EXPLAIN SELECT MIN(a), MIN(b) FROM t5 WHERE a = 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
SELECT MIN(a), MIN(b) FROM t5 WHERE a = 1;
MIN(a) MIN(b)
1 1
SELECT MIN(a), MIN(b) FROM t5 WHERE a = 1 and b > 1;
MIN(a) MIN(b)
1 2
DROP TABLE t1, t2, t3, t4, t5;
End of 5.0 tests End of 5.0 tests
-- source include/not_embedded.inc
-- source include/have_bdb.inc
#
# Bug #16206: Superfluous COMMIT event in binlog when updating BDB in autocommit mode
#
set autocommit=1;
let $VERSION=`select version()`;
reset master;
create table bug16206 (a int);
insert into bug16206 values(1);
start transaction;
insert into bug16206 values(2);
commit;
--replace_result $VERSION VERSION
--replace_column 1 f 2 n 5 n
show binlog events;
drop table bug16206;
reset master;
create table bug16206 (a int) engine= bdb;
insert into bug16206 values(0);
insert into bug16206 values(1);
start transaction;
insert into bug16206 values(2);
commit;
insert into bug16206 values(3);
--replace_result $VERSION VERSION
--replace_column 1 f 2 n 5 n
show binlog events;
drop table bug16206;
set autocommit=0;
--echo End of 5.0 tests
...@@ -817,5 +817,38 @@ SELECT a,AVG(DISTINCT b) AS average FROM t1 GROUP BY a HAVING average > 50; ...@@ -817,5 +817,38 @@ SELECT a,AVG(DISTINCT b) AS average FROM t1 GROUP BY a HAVING average > 50;
DROP TABLE t1; DROP TABLE t1;
#
# Bug #27573: MIN() on an indexed column which is always NULL sets _other_
# results to NULL
#
CREATE TABLE t1 ( a INT, b INT, KEY(a) );
INSERT INTO t1 VALUES (NULL, 1), (NULL, 2);
EXPLAIN SELECT MIN(a), MIN(b) FROM t1;
SELECT MIN(a), MIN(b) FROM t1;
CREATE TABLE t2( a INT, b INT, c INT, KEY(a, b) );
INSERT INTO t2 ( a, b, c ) VALUES ( 1, NULL, 2 ), ( 1, 3, 4 ), ( 1, 4, 4 );
EXPLAIN SELECT MIN(b), MIN(c) FROM t2 WHERE a = 1;
SELECT MIN(b), MIN(c) FROM t2 WHERE a = 1;
CREATE TABLE t3 (a INT, b INT, c int, KEY(a, b));
INSERT INTO t3 VALUES (1, NULL, 1), (2, NULL, 2), (2, NULL, 2), (3, NULL, 3);
EXPLAIN SELECT MIN(a), MIN(b) FROM t3 where a = 2;
SELECT MIN(a), MIN(b) FROM t3 where a = 2;
CREATE TABLE t4 (a INT, b INT, c int, KEY(a, b));
INSERT INTO t4 VALUES (1, 1, 1), (2, NULL, 2), (2, NULL, 2), (3, 1, 3);
EXPLAIN SELECT MIN(a), MIN(b) FROM t4 where a = 2;
SELECT MIN(a), MIN(b) FROM t4 where a = 2;
SELECT MIN(b), min(c) FROM t4 where a = 2;
CREATE TABLE t5( a INT, b INT, KEY( a, b) );
INSERT INTO t5 VALUES( 1, 1 ), ( 1, 2 );
EXPLAIN SELECT MIN(a), MIN(b) FROM t5 WHERE a = 1;
SELECT MIN(a), MIN(b) FROM t5 WHERE a = 1;
SELECT MIN(a), MIN(b) FROM t5 WHERE a = 1 and b > 1;
DROP TABLE t1, t2, t3, t4, t5;
### ###
--echo End of 5.0 tests --echo End of 5.0 tests
...@@ -207,11 +207,67 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds) ...@@ -207,11 +207,67 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds)
if (!ref.key_length) if (!ref.key_length)
error= table->file->index_first(table->record[0]); error= table->file->index_first(table->record[0]);
else else
error= table->file->index_read(table->record[0],key_buff, {
/*
Use index to replace MIN/MAX functions with their values
according to the following rules:
1) Insert the minimum non-null values where the WHERE clause still
matches, or
2) a NULL value if there are only NULL values for key_part_k.
3) Fail, producing a row of nulls
Implementation: Read the smallest value using the search key. If
the interval is open, read the next value after the search
key. If read fails, and we're looking for a MIN() value for a
nullable column, test if there is an exact match for the key.
*/
if (!(range_fl & NEAR_MIN))
/*
Closed interval: Either The MIN argument is non-nullable, or
we have a >= predicate for the MIN argument.
*/
error= table->file->index_read(table->record[0], ref.key_buff,
ref.key_length, ref.key_length,
range_fl & NEAR_MIN ?
HA_READ_AFTER_KEY :
HA_READ_KEY_OR_NEXT); HA_READ_KEY_OR_NEXT);
else
{
/*
Open interval: There are two cases:
1) We have only MIN() and the argument column is nullable, or
2) there is a > predicate on it, nullability is irrelevant.
We need to scan the next bigger record first.
*/
error= table->file->index_read(table->record[0], ref.key_buff,
ref.key_length, HA_READ_AFTER_KEY);
/*
If the found record is outside the group formed by the search
prefix, or there is no such record at all, check if all
records in that group have NULL in the MIN argument
column. If that is the case return that NULL.
Check if case 1 from above holds. If it does, we should read
the skipped tuple.
*/
if (ref.key_buff[prefix_len] == 1 &&
/*
Last keypart (i.e. the argument to MIN) is set to NULL by
find_key_for_maxmin only if all other keyparts are bound
to constants in a conjunction of equalities. Hence, we
can detect this by checking only if the last keypart is
NULL.
*/
(error == HA_ERR_KEY_NOT_FOUND ||
key_cmp_if_same(table, ref.key_buff, ref.key, prefix_len)))
{
DBUG_ASSERT(item_field->field->real_maybe_null());
error= table->file->index_read(table->record[0], ref.key_buff,
ref.key_length,
HA_READ_KEY_EXACT);
}
}
}
/* Verify that the read tuple indeed matches the search key */
if (!error && reckey_in_range(0, &ref, item_field->field, if (!error && reckey_in_range(0, &ref, item_field->field,
conds, range_fl, prefix_len)) conds, range_fl, prefix_len))
error= HA_ERR_KEY_NOT_FOUND; error= HA_ERR_KEY_NOT_FOUND;
...@@ -739,14 +795,24 @@ static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref, ...@@ -739,14 +795,24 @@ static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref,
if (!max_fl && key_part_used == key_part_to_use && part->null_bit) if (!max_fl && key_part_used == key_part_to_use && part->null_bit)
{ {
/* /*
SELECT MIN(key_part2) FROM t1 WHERE key_part1=const The query is on this form:
If key_part2 may be NULL, then we want to find the first row
that is not null SELECT MIN(key_part_k)
FROM t1
WHERE key_part_1 = const and ... and key_part_k-1 = const
If key_part_k is nullable, we want to find the first matching row
where key_part_k is not null. The key buffer is now {const, ...,
NULL}. This will be passed to the handler along with a flag
indicating open interval. If a tuple is read that does not match
these search criteria, an attempt will be made to read an exact
match for the key buffer.
*/ */
/* Set the first byte of key_part_k to 1, that means NULL */
ref->key_buff[ref->key_length]= 1; ref->key_buff[ref->key_length]= 1;
ref->key_length+= part->store_length; ref->key_length+= part->store_length;
*range_fl&= ~NO_MIN_RANGE; *range_fl&= ~NO_MIN_RANGE;
*range_fl|= NEAR_MIN; // > NULL *range_fl|= NEAR_MIN; // Open interval
} }
/* /*
The following test is false when the key in the key tree is The following test is false when the key in the key tree is
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment