A fix and a test case for

Bug#41756 "Strange error messages about locks from InnoDB". In JT_EQ_REF (join_read_key()) access method, don't try to unlock rows in the handler, unless certain that a) they were locked b) they are not used. Unlocking of rows is done by the logic of the nested join loop, and is unaware of the possible caching that the access method may have. This could lead to double unlocking, when a row was unlocked first after reading into the cache, and then when taken from cache, as well as to unlocking of rows which were actually used (but taken from cache). Delegate part of the unlocking logic to the access method, and in JT_EQ_REF count how many times a record was actually used in the join. Unlock it only if it's usage count is 0. Implemented review comments. mysql-test/r/bug41756.result: Add result file (Bug#41756) mysql-test/t/bug41756-master.opt: Use --innodb-locks-unsafe-for-binlog, as in 5.0 just using read_committed isolation is not sufficient to reproduce the bug. mysql-test/t/bug41756.test: Add a test file (Bug#41756) sql/item_subselect.cc: Complete struct READ_RECORD initialization with a new member to unlock records. sql/records.cc: Extend READ_RECORD API with a method to unlock read records. sql/sql_select.cc: In JT_EQ_REF (join_read_key()) access method, don't try to unlock rows in the handler, unless certain that a) they were locked b) they are not used. sql/sql_select.h: Add members to TABLE_REF to count TABLE_REF buffer usage count. sql/structs.h: Update declarations.

A fix and a test case for
Bug#41756 "Strange error messages about locks from InnoDB". In JT_EQ_REF (join_read_key()) access method, don't try to unlock rows in the handler, unless certain that a) they were locked b) they are not used. Unlocking of rows is done by the logic of the nested join loop, and is unaware of the possible caching that the access method may have. This could lead to double unlocking, when a row was unlocked first after reading into the cache, and then when taken from cache, as well as to unlocking of rows which were actually used (but taken from cache). Delegate part of the unlocking logic to the access method, and in JT_EQ_REF count how many times a record was actually used in the join. Unlock it only if it's usage count is 0. Implemented review comments. mysql-test/r/bug41756.result: Add result file (Bug#41756) mysql-test/t/bug41756-master.opt: Use --innodb-locks-unsafe-for-binlog, as in 5.0 just using read_committed isolation is not sufficient to reproduce the bug. mysql-test/t/bug41756.test: Add a test file (Bug#41756) sql/item_subselect.cc: Complete struct READ_RECORD initialization with a new member to unlock records. sql/records.cc: Extend READ_RECORD API with a method to unlock read records. sql/sql_select.cc: In JT_EQ_REF (join_read_key()) access method, don't try to unlock rows in the handler, unless certain that a) they were locked b) they are not used. sql/sql_select.h: Add members to TABLE_REF to count TABLE_REF buffer usage count. sql/structs.h: Update declarations.
06c9d62a · Konstantin Osipov · 98198851 · 06c9d62a · 06c9d62a · 06c9d62a
Commit 06c9d62a authored Nov 03, 2009 by Konstantin Osipov
8 changed files
--- a/mysql-test/r/bug41756.result
+++ b/mysql-test/r/bug41756.result
+#
+# Bug#41756 Strange error messages about locks from InnoDB
+#
+drop table if exists t1;
+# In the default transaction isolation mode, and/or with
+# innodb_locks_unsafe_for_binlog=OFF, handler::unlock_row()
+# in InnoDB does nothing.
+# Thus in order to reproduce the condition that led to the
+# warning, one needs to relax isolation by either
+# setting a weaker tx_isolation value, or by turning on
+# the unsafe replication switch.
+# For testing purposes, choose to tweak the isolation level,
+# since it's settable at runtime, unlike
+# innodb_locks_unsafe_for_binlog, which is
+# only a command-line switch.
+#
+set @@session.tx_isolation="read-committed";
+# Prepare data. We need a table with a unique index,
+# for join_read_key to be used. The other column
+# allows to control what passes WHERE clause filter.
+create table t1 (a int primary key, b int) engine=innodb;
+# Let's make sure t1 has sufficient amount of rows
+# to exclude JT_ALL access method when reading it,
+# i.e. make sure that JT_EQ_REF(a) is always preferred.
+insert into t1 values (1,1), (2,null), (3,1), (4,1),
+(5,1), (6,1), (7,1), (8,1), (9,1), (10,1),
+(11,1), (12,1);
+#
+# Demonstrate that for the SELECT statement
+# used later in the test JT_EQ_REF access method is used.
+#
+explain
+select 1 from t1 natural join (select 2 as a, 1 as b union all
+select 2 as a, 2 as b) as t2 for update;
+id	1
+select_type	PRIMARY
+table	<derived2>
+type	ALL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	2
+Extra	
+id	1
+select_type	PRIMARY
+table	t1
+type	eq_ref
+possible_keys	PRIMARY
+key	PRIMARY
+key_len	4
+ref	t2.a
+rows	1
+Extra	Using where
+id	2
+select_type	DERIVED
+table	NULL
+type	NULL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	NULL
+Extra	No tables used
+id	3
+select_type	UNION
+table	NULL
+type	NULL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	NULL
+Extra	No tables used
+id	NULL
+select_type	UNION RESULT
+table	<union2,3>
+type	ALL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	NULL
+Extra	
+#
+# Demonstrate that the reported SELECT statement
+# no longer produces warnings.
+#
+select 1 from t1 natural join (select 2 as a, 1 as b union all
+select 2 as a, 2 as b) as t2 for update;
+1
+commit;
+# 
+# Demonstrate that due to lack of inter-sweep "reset" function,
+# we keep some non-matching records locked, even though we know
+# we could unlock them.
+# To do that, show that if there is only one distinct value
+# for a in t2 (a=2), we will keep record (2,null) in t1 locked.
+# But if we add another value for "a" to t2, say 6,
+# join_read_key cache will be pruned at least once, 
+# and thus record (2, null) in t1 will get unlocked.
+#
+begin;
+select 1 from t1 natural join (select 2 as a, 1 as b union all
+select 2 as a, 2 as b) as t2 for update;
+1
+#
+# Switching to connection con1
+# We should be able to delete all records from t1 except (2, null),
+# since they were not locked.
+begin;
+delete from t1 where a in (1,3,4);
+delete from t1 where a in (5,6,7);
+delete from t1 where a in (8,9,10);
+delete from t1 where a in (11,12);
+# 
+# Record (2, null) is locked. This is actually unnecessary, 
+# because the previous select returned no rows. 
+# Just demonstrate the effect.
+#
+delete from t1;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+rollback;
+#
+# Switching to connection default
+#
+# Show that the original contents of t1 is intact:
+select * from t1;
+a	b
+1	1
+2	NULL
+3	1
+4	1
+5	1
+6	1
+7	1
+8	1
+9	1
+10	1
+11	1
+12	1
+commit;
+#
+# Have a one more record in t2 to show that 
+# if join_read_key cache is purned, the current
+# row under the cursor is unlocked (provided, this row didn't 
+# match the partial WHERE clause, of course).
+# Sic: the result of this test dependent on the order of retrieval
+# of records --echo # from the derived table, if !
+# We use DELETE to disable the JOIN CACHE. This DELETE modifies no
+# records. It also should leave no InnoDB row locks.
+#
+begin;
+delete t1.* from t1 natural join (select 2 as a, 2 as b union all
+select 0 as a, 0 as b) as t2;
+# Demonstrate that nothing was deleted form t1
+select * from t1;
+a	b
+1	1
+2	NULL
+3	1
+4	1
+5	1
+6	1
+7	1
+8	1
+9	1
+10	1
+11	1
+12	1
+#
+# Switching to connection con1
+begin;
+# Since there is another distinct record in the derived table
+# the previous matching record in t1 -- (2,null) -- was unlocked.
+delete from t1;
+# We will need the contents of the table again.
+rollback;
+select * from t1;
+a	b
+1	1
+2	NULL
+3	1
+4	1
+5	1
+6	1
+7	1
+8	1
+9	1
+10	1
+11	1
+12	1
+commit;
+#
+# Switching to connection default
+commit;
+begin;
+#
+# Before this patch, we could wrongly unlock a record
+# that was cached and later used in a join. Demonstrate that
+# this is no longer the case.
+# Sic: this test is also order-dependent (i.e. the
+# the bug would show up only if the first record in the union
+# is retreived and processed first.
+#
+# Verify that JT_EQ_REF is used.
+explain
+select 1 from t1 natural join (select 3 as a, 3 as b union all
+select 3 as a, 1 as b) as t2 for update;
+id	1
+select_type	PRIMARY
+table	t1
+type	ALL
+possible_keys	PRIMARY
+key	NULL
+key_len	NULL
+ref	NULL
+rows	1
+Extra	
+id	1
+select_type	PRIMARY
+table	<derived2>
+type	ALL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	2
+Extra	Using where
+id	2
+select_type	DERIVED
+table	NULL
+type	NULL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	NULL
+Extra	No tables used
+id	3
+select_type	UNION
+table	NULL
+type	NULL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	NULL
+Extra	No tables used
+id	NULL
+select_type	UNION RESULT
+table	<union2,3>
+type	ALL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	NULL
+Extra	
+# Lock the record.
+select 1 from t1 natural join (select 3 as a, 3 as b union all
+select 3 as a, 2 as b) as t2 for update;
+1
+# Switching to connection con1
+#
+# We should not be able to delete record (3,1) from t1,
+# (previously it was possible).
+#
+delete from t1 where a=3;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
+# Switching to connection default
+commit;
+set @@session.tx_isolation=default;
+drop table t1;
+#
+# End of 5.0 tests
+#
--- a/mysql-test/t/bug41756-master.opt
+++ b/mysql-test/t/bug41756-master.opt
+--innodb_lock_wait_timeout=1
+--innodb-locks-unsafe-for-binlog
--- a/mysql-test/t/bug41756.test
+++ b/mysql-test/t/bug41756.test
+--source include/have_innodb.inc
+--echo #
+--echo # Bug#41756 Strange error messages about locks from InnoDB
+--echo #
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
+--echo # In the default transaction isolation mode, and/or with
+--echo # innodb_locks_unsafe_for_binlog=OFF, handler::unlock_row()
+--echo # in InnoDB does nothing.
+--echo # Thus in order to reproduce the condition that led to the
+--echo # warning, one needs to relax isolation by either
+--echo # setting a weaker tx_isolation value, or by turning on
+--echo # the unsafe replication switch.
+--echo # For testing purposes, choose to tweak the isolation level,
+--echo # since it's settable at runtime, unlike
+--echo # innodb_locks_unsafe_for_binlog, which is
+--echo # only a command-line switch.
+--echo #
+set @@session.tx_isolation="read-committed";
+
+--echo # Prepare data. We need a table with a unique index,
+--echo # for join_read_key to be used. The other column
+--echo # allows to control what passes WHERE clause filter.
+create table t1 (a int primary key, b int) engine=innodb;
+--echo # Let's make sure t1 has sufficient amount of rows
+--echo # to exclude JT_ALL access method when reading it,
+--echo # i.e. make sure that JT_EQ_REF(a) is always preferred.
+insert into t1 values (1,1), (2,null), (3,1), (4,1),
+                      (5,1), (6,1), (7,1), (8,1), (9,1), (10,1),
+                      (11,1), (12,1);
+--echo #
+--echo # Demonstrate that for the SELECT statement
+--echo # used later in the test JT_EQ_REF access method is used.
+--echo #
+--vertical_results
+explain
+select 1 from t1 natural join (select 2 as a, 1 as b union all
+                               select 2 as a, 2 as b) as t2 for update;
+--horizontal_results
+--echo #
+--echo # Demonstrate that the reported SELECT statement
+--echo # no longer produces warnings.
+--echo #
+select 1 from t1 natural join (select 2 as a, 1 as b union all
+                               select 2 as a, 2 as b) as t2 for update;
+commit;
+--echo # 
+--echo # Demonstrate that due to lack of inter-sweep "reset" function,
+--echo # we keep some non-matching records locked, even though we know
+--echo # we could unlock them.
+--echo # To do that, show that if there is only one distinct value
+--echo # for a in t2 (a=2), we will keep record (2,null) in t1 locked.
+--echo # But if we add another value for "a" to t2, say 6,
+--echo # join_read_key cache will be pruned at least once, 
+--echo # and thus record (2, null) in t1 will get unlocked.
+--echo #
+begin;
+select 1 from t1 natural join (select 2 as a, 1 as b union all
+                               select 2 as a, 2 as b) as t2 for update;
+connect (con1,localhost,root,,);
+--echo #
+--echo # Switching to connection con1
+connection con1;
+--echo # We should be able to delete all records from t1 except (2, null),
+--echo # since they were not locked.
+begin;
+delete from t1 where a in (1,3,4);
+delete from t1 where a in (5,6,7);
+delete from t1 where a in (8,9,10);
+delete from t1 where a in (11,12);
+--echo # 
+--echo # Record (2, null) is locked. This is actually unnecessary, 
+--echo # because the previous select returned no rows. 
+--echo # Just demonstrate the effect.
+--echo #
+--error ER_LOCK_WAIT_TIMEOUT
+delete from t1;
+rollback;
+--echo #
+--echo # Switching to connection default
+connection default;
+--echo #
+--echo # Show that the original contents of t1 is intact:
+select * from t1;
+commit;
+--echo #
+--echo # Have a one more record in t2 to show that 
+--echo # if join_read_key cache is purned, the current
+--echo # row under the cursor is unlocked (provided, this row didn't 
+--echo # match the partial WHERE clause, of course).
+--echo # Sic: the result of this test dependent on the order of retrieval
+--echo # of records --echo # from the derived table, if !
+--echo # We use DELETE to disable the JOIN CACHE. This DELETE modifies no
+--echo # records. It also should leave no InnoDB row locks.
+--echo #
+begin;
+delete t1.* from t1 natural join (select 2 as a, 2 as b union all
+                                  select 0 as a, 0 as b) as t2;
+--echo # Demonstrate that nothing was deleted form t1
+select * from t1;
+--echo #
+--echo # Switching to connection con1
+connection con1;
+begin;
+--echo # Since there is another distinct record in the derived table
+--echo # the previous matching record in t1 -- (2,null) -- was unlocked.
+delete from t1;
+--echo # We will need the contents of the table again.
+rollback;
+select * from t1;
+commit;
+--echo #
+--echo # Switching to connection default
+connection default;
+commit;
+begin;
+--echo #
+--echo # Before this patch, we could wrongly unlock a record
+--echo # that was cached and later used in a join. Demonstrate that
+--echo # this is no longer the case.
+--echo # Sic: this test is also order-dependent (i.e. the
+--echo # the bug would show up only if the first record in the union
+--echo # is retreived and processed first.
+--echo #
+--echo # Verify that JT_EQ_REF is used.
+--vertical_results
+explain
+select 1 from t1 natural join (select 3 as a, 3 as b union all
+                               select 3 as a, 1 as b) as t2 for update;
+--horizontal_results
+--echo # Lock the record.
+select 1 from t1 natural join (select 3 as a, 3 as b union all
+                               select 3 as a, 2 as b) as t2 for update;
+--echo # Switching to connection con1
+connection con1;
+--echo #
+--echo # We should not be able to delete record (3,1) from t1,
+--echo # (previously it was possible).
+--echo #
+--error ER_LOCK_WAIT_TIMEOUT
+delete from t1 where a=3;
+--echo # Switching to connection default
+connection default;
+commit;
+
+disconnect con1;
+set @@session.tx_isolation=default;
+drop table t1;
+
+--echo #
+--echo # End of 5.0 tests
+--echo #
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -1869,6 +1869,7 @@ int subselect_single_select_engine::exec()
              tab->read_record.record= tab->table->record[0];
              tab->read_record.thd= join->thd;
              tab->read_record.ref_length= tab->table->file->ref_length;
+              tab->read_record.unlock_row= rr_unlock_row;
              *(last_changed_tab++)= tab;
              break;
            }

--- a/sql/records.cc
+++ b/sql/records.cc
@@ -61,6 +61,7 @@ void init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
  info->file=  table->file;
  info->record= table->record[0];
  info->print_error= print_error;
+  info->unlock_row= rr_unlock_row;

  table->status=0;			/* And it's always found */
  if (!table->file->inited)
@@ -135,6 +136,7 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
  }
  info->select=select;
  info->print_error=print_error;
+  info->unlock_row= rr_unlock_row;
  info->ignore_not_found_rows= 0;
  table->status=0;			/* And it's always found */


--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -140,6 +140,7 @@ static int join_read_const_table(JOIN_TAB *tab, POSITION *pos);
 static int join_read_system(JOIN_TAB *tab);
 static int join_read_const(JOIN_TAB *tab);
 static int join_read_key(JOIN_TAB *tab);
+static void join_read_key_unlock_row(st_join_table *tab);
 static int join_read_always_key(JOIN_TAB *tab);
 static int join_read_last_key(JOIN_TAB *tab);
 static int join_no_more_records(READ_RECORD *info);
@@ -5376,7 +5377,9 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
  }
  j->ref.key_buff2=j->ref.key_buff+ALIGN_SIZE(length);
  j->ref.key_err=1;
+  j->ref.has_record= FALSE;
  j->ref.null_rejecting= 0;
+  j->ref.use_count= 0;
  keyuse=org_keyuse;

  store_key **ref_key= j->ref.key_copy;
@@ -6176,6 +6179,20 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
  DBUG_RETURN(0);
 }

+
+/**
+  The default implementation of unlock-row method of READ_RECORD,
+  used in all access methods.
+*/
+
+void rr_unlock_row(st_join_table *tab)
+{
+  READ_RECORD *info= &tab->read_record;
+  info->file->unlock_row();
+}
+
+
+
 static void
 make_join_readinfo(JOIN *join, ulonglong options)
 {
@@ -6191,6 +6208,7 @@ make_join_readinfo(JOIN *join, ulonglong options)
    TABLE *table=tab->table;
    tab->read_record.table= table;
    tab->read_record.file=table->file;
+    tab->read_record.unlock_row= rr_unlock_row;
    tab->next_select=sub_select;		/* normal select */

    /*
@@ -6234,6 +6252,7 @@ make_join_readinfo(JOIN *join, ulonglong options)
      delete tab->quick;
      tab->quick=0;
      tab->read_first_record= join_read_key;
+      tab->read_record.unlock_row= join_read_key_unlock_row;
      tab->read_record.read_record= join_no_more_records;
      if (table->used_keys.is_set(tab->ref.key) &&
 	  !table->no_keyread)
@@ -10936,7 +10955,7 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
        return NESTED_LOOP_NO_MORE_ROWS;
    }
    else
-      join_tab->read_record.file->unlock_row();
+      join_tab->read_record.unlock_row(join_tab);
  }
  else
  {
@@ -10946,7 +10965,7 @@ evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
    */
    join->examined_rows++;
    join->thd->row_count++;
-    join_tab->read_record.file->unlock_row();
+    join_tab->read_record.unlock_row(join_tab);
  }
  return NESTED_LOOP_OK;
 }
@@ -11299,17 +11318,55 @@ join_read_key(JOIN_TAB *tab)
      table->status=STATUS_NOT_FOUND;
      return -1;
    }
+    /*
+      Moving away from the current record. Unlock the row
+      in the handler if it did not match the partial WHERE.
+    */
+    if (tab->ref.has_record && tab->ref.use_count == 0)
+    {
+      tab->read_record.file->unlock_row();
+      tab->ref.has_record= FALSE;
+    }
+
    error=table->file->index_read(table->record[0],
 				  tab->ref.key_buff,
 				  tab->ref.key_length,HA_READ_KEY_EXACT);
    if (error && error != HA_ERR_KEY_NOT_FOUND)
      return report_error(table, error);
+
+    if (! error)
+    {
+      tab->ref.has_record= TRUE;
+      tab->ref.use_count= 1;
+    }
+  }
+  else if (table->status == 0)
+  {
+    DBUG_ASSERT(tab->ref.has_record);
+    tab->ref.use_count++;
  }
  table->null_row=0;
  return table->status ? -1 : 0;
 }


+/**
+  Since join_read_key may buffer a record, do not unlock
+  it if it was not used in this invocation of join_read_key().
+  Only count locks, thus remembering if the record was left unused,
+  and unlock already when pruning the current value of
+  TABLE_REF buffer.
+  @sa join_read_key()
+*/
+
+static void
+join_read_key_unlock_row(st_join_table *tab)
+{
+  DBUG_ASSERT(tab->ref.use_count);
+  if (tab->ref.use_count)
+    tab->ref.use_count--;
+}
+
 /*
  ref access method implementation: "read_first" function


--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -53,6 +53,8 @@ class store_key;
 typedef struct st_table_ref
 {
  bool		key_err;
+  /** True if something was read into buffer in join_read_key.  */
+  bool          has_record;
  uint          key_parts;                // num of ...
  uint          key_length;               // length of key_buff
  int           key;                      // key no
@@ -79,7 +81,11 @@ typedef struct st_table_ref
  key_part_map  null_rejecting;
  table_map	depend_map;		  // Table depends on these tables.
  byte          *null_ref_key;		  // null byte position in the key_buf.
-  					  // used for REF_OR_NULL optimization.
+  /*
+    The number of times the record associated with this key was used
+    in the join.
+  */
+  ha_rows       use_count;
 } TABLE_REF;

 /*

--- a/sql/structs.h
+++ b/sql/structs.h
@@ -117,16 +117,22 @@ typedef struct st_reginfo {		/* Extra info about reg */
 } REGINFO;


-struct st_read_record;				/* For referense later */
 class SQL_SELECT;
 class THD;
 class handler;
+struct st_join_table;
+
+void rr_unlock_row(st_join_table *tab);

-typedef struct st_read_record {			/* Parameter to read_record */
+struct READ_RECORD {			/* Parameter to read_record */
+  typedef int (*Read_func)(READ_RECORD*);
+  typedef void (*Unlock_row_func)(st_join_table *);
  struct st_table *table;			/* Head-form */
  handler *file;
  struct st_table **forms;			/* head and ref forms */
-  int (*read_record)(struct st_read_record *);
+
+  Read_func read_record;
+  Unlock_row_func unlock_row;
  THD *thd;
  SQL_SELECT *select;
  uint cache_records;
@@ -138,7 +144,7 @@ typedef struct st_read_record {			/* Parameter to read_record */
  byte	*cache,*cache_pos,*cache_end,*read_positions;
  IO_CACHE *io_cache;
  bool print_error, ignore_not_found_rows;
-} READ_RECORD;
+};


 /*