Bug#27573: MIN() on an indexed column which is always NULL sets _other_ results

to NULL For queries of the form SELECT MIN(key_part_k) FROM t1 WHERE key_part_1 = const and ... and key_part_k-1 = const, the opt_sum_query optimization tries to use an index to substitute MIN/MAX functions with their values according to the following rules: 1) Insert the minimum non-null values where the WHERE clause still matches, or 3) A row of nulls However, the correct semantics requires that there is a third case 2) such that a NULL value is substituted if there are only NULL values for key_part_k. The patch modifies opt_sum_query() to handle this missing case.

Bug#27573: MIN() on an indexed column which is always NULL sets _other_ results
to NULL For queries of the form SELECT MIN(key_part_k) FROM t1 WHERE key_part_1 = const and ... and key_part_k-1 = const, the opt_sum_query optimization tries to use an index to substitute MIN/MAX functions with their values according to the following rules: 1) Insert the minimum non-null values where the WHERE clause still matches, or 3) A row of nulls However, the correct semantics requires that there is a third case 2) such that a NULL value is substituted if there are only NULL values for key_part_k. The patch modifies opt_sum_query() to handle this missing case.
5bc137ff · mhansson/martin@linux-st28.site · 54bc9c3d · 5bc137ff · 5bc137ff · 5bc137ff
Commit 5bc137ff authored May 15, 2007 by mhansson/martin@linux-st28.site
5 changed files
--- a/mysql-test/r/bdb_notembedded.result
+++ b/mysql-test/r/bdb_notembedded.result
+set autocommit=1;
+reset master;
+create table bug16206 (a int);
+insert into bug16206 values(1);
+start transaction;
+insert into bug16206 values(2);
+commit;
+show binlog events;
+Log_name	Pos	Event_type	Server_id	End_log_pos	Info
+f	n	Format_desc	1	n	Server ver: VERSION, Binlog ver: 4
+f	n	Query	1	n	use `test`; create table bug16206 (a int)
+f	n	Query	1	n	use `test`; insert into bug16206 values(1)
+f	n	Query	1	n	use `test`; insert into bug16206 values(2)
+drop table bug16206;
+reset master;
+create table bug16206 (a int) engine=         bdb;
+insert into bug16206 values(0);
+insert into bug16206 values(1);
+start transaction;
+insert into bug16206 values(2);
+commit;
+insert into bug16206 values(3);
+show binlog events;
+Log_name	Pos	Event_type	Server_id	End_log_pos	Info
+f	n	Format_desc	1	n	Server ver: VERSION, Binlog ver: 4
+f	n	Query	1	n	use `test`; create table bug16206 (a int) engine=         bdb
+f	n	Query	1	n	use `test`; insert into bug16206 values(0)
+f	n	Query	1	n	use `test`; insert into bug16206 values(1)
+f	n	Query	1	n	use `test`; BEGIN
+f	n	Query	1	n	use `test`; insert into bug16206 values(2)
+f	n	Query	1	n	use `test`; COMMIT
+f	n	Query	1	n	use `test`; insert into bug16206 values(3)
+drop table bug16206;
+set autocommit=0;
+End of 5.0 tests
--- a/mysql-test/r/func_group.result
+++ b/mysql-test/r/func_group.result
@@ -1321,4 +1321,51 @@ SELECT a,AVG(DISTINCT b) AS average FROM t1 GROUP BY a HAVING average > 50;
 a	average
 1	32768.5000
 DROP TABLE t1;
+CREATE TABLE t1 ( a INT, b INT, KEY(a) );
+INSERT INTO t1 VALUES (NULL, 1), (NULL, 2);
+EXPLAIN SELECT MIN(a), MIN(b) FROM t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	2	
+SELECT MIN(a), MIN(b) FROM t1;
+MIN(a)	MIN(b)
+NULL	1
+CREATE TABLE t2( a INT, b INT, c INT, KEY(a, b) );
+INSERT INTO t2 ( a, b, c ) VALUES ( 1, NULL, 2 ), ( 1, 3, 4 ), ( 1, 4, 4 );
+EXPLAIN SELECT MIN(b), MIN(c) FROM t2 WHERE a = 1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	ref	a	a	5	const	2	Using where
+SELECT MIN(b), MIN(c) FROM t2 WHERE a = 1;
+MIN(b)	MIN(c)
+3	2
+CREATE TABLE t3 (a INT, b INT, c int, KEY(a, b));
+INSERT INTO t3 VALUES (1, NULL, 1), (2, NULL, 2),  (2, NULL, 2),  (3, NULL, 3);
+EXPLAIN SELECT MIN(a), MIN(b) FROM t3 where a = 2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Select tables optimized away
+SELECT MIN(a), MIN(b) FROM t3 where a = 2;
+MIN(a)	MIN(b)
+2	NULL
+CREATE TABLE t4 (a INT, b INT, c int, KEY(a, b));
+INSERT INTO t4 VALUES (1, 1, 1), (2, NULL, 2),  (2, NULL, 2),  (3, 1, 3);
+EXPLAIN SELECT MIN(a), MIN(b) FROM t4 where a = 2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Select tables optimized away
+SELECT MIN(a), MIN(b) FROM t4 where a = 2;
+MIN(a)	MIN(b)
+2	NULL
+SELECT MIN(b), min(c) FROM t4 where a = 2;
+MIN(b)	min(c)
+NULL	2
+CREATE TABLE t5( a INT, b INT, KEY( a, b) );
+INSERT INTO t5 VALUES( 1, 1 ), ( 1, 2 );
+EXPLAIN SELECT MIN(a), MIN(b) FROM t5 WHERE a = 1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Select tables optimized away
+SELECT MIN(a), MIN(b) FROM t5 WHERE a = 1;
+MIN(a)	MIN(b)
+1	1
+SELECT MIN(a), MIN(b) FROM t5 WHERE a = 1 and b > 1;
+MIN(a)	MIN(b)
+1	2
+DROP TABLE t1, t2, t3, t4, t5;
 End of 5.0 tests
--- a/mysql-test/t/bdb_notembedded.test
+++ b/mysql-test/t/bdb_notembedded.test
+-- source include/not_embedded.inc
+-- source include/have_bdb.inc
+#
+# Bug #16206: Superfluous COMMIT event in binlog when updating BDB in autocommit mode
+#
+set autocommit=1;
+let $VERSION=`select version()`;
+reset master;
+create table bug16206 (a int);
+insert into bug16206 values(1);
+start transaction;
+insert into bug16206 values(2);
+commit;
+--replace_result $VERSION VERSION
+--replace_column 1 f 2 n 5 n
+show binlog events;
+drop table bug16206;
+reset master;
+create table bug16206 (a int) engine=         bdb;
+insert into bug16206 values(0);
+insert into bug16206 values(1);
+start transaction;
+insert into bug16206 values(2);
+commit;
+insert into bug16206 values(3);
+--replace_result $VERSION VERSION
+--replace_column 1 f 2 n 5 n
+show binlog events;
+drop table bug16206;
+set autocommit=0;
+--echo End of 5.0 tests
--- a/mysql-test/t/func_group.test
+++ b/mysql-test/t/func_group.test
@@ -817,5 +817,38 @@ SELECT a,AVG(DISTINCT b) AS average FROM t1 GROUP BY a HAVING average > 50;
 DROP TABLE t1;
+#
+# Bug #27573: MIN() on an indexed column which is always NULL sets _other_ 
+# results to NULL
+#
+CREATE TABLE t1 ( a INT, b INT, KEY(a) );
+INSERT INTO t1 VALUES (NULL, 1), (NULL, 2);
+EXPLAIN SELECT MIN(a), MIN(b) FROM t1;
+SELECT MIN(a), MIN(b) FROM t1;
+CREATE TABLE t2( a INT, b INT, c INT, KEY(a, b) );
+INSERT INTO t2 ( a, b, c ) VALUES ( 1, NULL, 2 ), ( 1, 3, 4 ), ( 1, 4, 4 );
+EXPLAIN SELECT MIN(b), MIN(c) FROM t2 WHERE a = 1;
+SELECT MIN(b), MIN(c) FROM t2 WHERE a = 1;
+CREATE TABLE t3 (a INT, b INT, c int, KEY(a, b));
+INSERT INTO t3 VALUES (1, NULL, 1), (2, NULL, 2),  (2, NULL, 2),  (3, NULL, 3);
+EXPLAIN SELECT MIN(a), MIN(b) FROM t3 where a = 2;
+SELECT MIN(a), MIN(b) FROM t3 where a = 2;
+CREATE TABLE t4 (a INT, b INT, c int, KEY(a, b));
+INSERT INTO t4 VALUES (1, 1, 1), (2, NULL, 2),  (2, NULL, 2),  (3, 1, 3);
+EXPLAIN SELECT MIN(a), MIN(b) FROM t4 where a = 2;
+SELECT MIN(a), MIN(b) FROM t4 where a = 2;
+SELECT MIN(b), min(c) FROM t4 where a = 2;
+CREATE TABLE t5( a INT, b INT, KEY( a, b) ); 
+INSERT INTO t5 VALUES( 1, 1 ), ( 1, 2 );
+EXPLAIN SELECT MIN(a), MIN(b) FROM t5 WHERE a = 1;
+SELECT MIN(a), MIN(b) FROM t5 WHERE a = 1;
+SELECT MIN(a), MIN(b) FROM t5 WHERE a = 1 and b > 1;
+DROP TABLE t1, t2, t3, t4, t5;
 ###
 --echo End of 5.0 tests
--- a/sql/opt_sum.cc
+++ b/sql/opt_sum.cc
@@ -207,11 +207,67 @@ int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds)
          if (!ref.key_length)
            error= table->file->index_first(table->record[0]);
          else 
-	    error= table->file->index_read(table->record[0],key_buff,
+          {
+            /*
+              Use index to replace MIN/MAX functions with their values
+              according to the following rules:
+              1) Insert the minimum non-null values where the WHERE clause still
+                 matches, or
+              2) a NULL value if there are only NULL values for key_part_k.
+              3) Fail, producing a row of nulls
+              Implementation: Read the smallest value using the search key. If
+              the interval is open, read the next value after the search
+              key. If read fails, and we're looking for a MIN() value for a
+              nullable column, test if there is an exact match for the key.
+            */
+            if (!(range_fl & NEAR_MIN))
+              /* 
+                 Closed interval: Either The MIN argument is non-nullable, or
+                 we have a >= predicate for the MIN argument.
+              */
+              error= table->file->index_read(table->record[0], ref.key_buff,
                                             ref.key_length, 
-					   range_fl & NEAR_MIN ?
-					   HA_READ_AFTER_KEY :
                                             HA_READ_KEY_OR_NEXT);
+            else
+            {
+              /*
+                Open interval: There are two cases:
+                1) We have only MIN() and the argument column is nullable, or
+                2) there is a > predicate on it, nullability is irrelevant.
+                We need to scan the next bigger record first.
+              */
+              error= table->file->index_read(table->record[0], ref.key_buff, 
+                                             ref.key_length, HA_READ_AFTER_KEY);
+              /* 
+                 If the found record is outside the group formed by the search
+                 prefix, or there is no such record at all, check if all
+                 records in that group have NULL in the MIN argument
+                 column. If that is the case return that NULL.
+                 Check if case 1 from above holds. If it does, we should read
+                 the skipped tuple.
+              */
+              if (ref.key_buff[prefix_len] == 1 && 
+                  /* 
+                     Last keypart (i.e. the argument to MIN) is set to NULL by
+                     find_key_for_maxmin only if all other keyparts are bound
+                     to constants in a conjunction of equalities. Hence, we
+                     can detect this by checking only if the last keypart is
+                     NULL.
+                  */                     
+                  (error == HA_ERR_KEY_NOT_FOUND ||
+                   key_cmp_if_same(table, ref.key_buff, ref.key, prefix_len)))
+              {
+                DBUG_ASSERT(item_field->field->real_maybe_null());
+                error= table->file->index_read(table->record[0], ref.key_buff,
+                                               ref.key_length, 
+                                               HA_READ_KEY_EXACT);
+              }
+            }
+          }
+          /* Verify that the read tuple indeed matches the search key */
 	  if (!error && reckey_in_range(0, &ref, item_field->field, 
 			                conds, range_fl, prefix_len))
 	    error= HA_ERR_KEY_NOT_FOUND;
@@ -739,14 +795,24 @@ static bool find_key_for_maxmin(bool max_fl, TABLE_REF *ref,
          if (!max_fl && key_part_used == key_part_to_use && part->null_bit)
          {
            /*
-              SELECT MIN(key_part2) FROM t1 WHERE key_part1=const
+              The query is on this form:
-              If key_part2 may be NULL, then we want to find the first row
-              that is not null
+              SELECT MIN(key_part_k) 
+              FROM t1 
+              WHERE key_part_1 = const and ... and key_part_k-1 = const
+              If key_part_k is nullable, we want to find the first matching row
+              where key_part_k is not null. The key buffer is now {const, ...,
+              NULL}. This will be passed to the handler along with a flag
+              indicating open interval. If a tuple is read that does not match
+              these search criteria, an attempt will be made to read an exact
+              match for the key buffer.
            */
+            /* Set the first byte of key_part_k to 1, that means NULL */
            ref->key_buff[ref->key_length]= 1;
            ref->key_length+= part->store_length;
            *range_fl&= ~NO_MIN_RANGE;
-            *range_fl|= NEAR_MIN;                // > NULL
+            *range_fl|= NEAR_MIN; // Open interval
          }
          /*
            The following test is false when the key in the key tree is