Commit dac6ffb9 authored by unknown's avatar unknown

BUG#36197: flush tables (or little table cache) can cause crash on slave

When flushing tables, there were a slight chance that the flush was occuring
between processing of two table map events. Since the tables are opened
one by one, it might result in that the tables were not valid and that sub-
sequent locking of tables would cause the slave to crash.

The problem is solved by opening and locking all tables at once using
simple_open_n_lock_tables(). Also, the patch contain a change to open_tables()
so that pre-locking only takes place when the trg_event_map is not zero, which
was not the case before (this caused the lock to be placed in thd->locked_tables
instead of thd->lock since the assumption was that triggers would be called
later and therefore the tables should be pre-locked).


mysql-test/suite/rpl/r/rpl_found_rows.result:
  Result change
mysql-test/suite/rpl/r/rpl_row_inexist_tbl.result:
  Result change
mysql-test/suite/rpl/t/rpl_found_rows.test:
  Adding drop of table that was created in test.
mysql-test/suite/rpl/t/rpl_slave_status.test:
  Adding waits for slave start and stop to ensure that test works.
sql/log_event.cc:
  Replacing table-by-table open and lock with a single call
  to simple_open_n_lock_tables(), which in turn required some
  changes to other code.
sql/log_event_old.cc:
  Replacing table-by-table open and lock with a single call
  to simple_open_n_lock_tables(), which in turn required some
  changes to other code.
sql/sql_base.cc:
  Extending check inside open_tables() so that pre-locking in only done if
  tables->trg_egent_map is non-zero.
mysql-test/include/wait_for_slave_sql_to_start.inc:
  New BitKeeper file ``mysql-test/include/wait_for_slave_sql_to_start.inc''
parent 02c00f95
###################################################
#Author: Mats (based on file written by Jeb)
#Date: 2008-05-06
#Purpose: To wait for slave SQL thread to start
#Details:
# 1) Fill in and setup variables
# 2) loop through looking for both
# io and sql threads to start
# 3) If loops too long die.
####################################################
connection slave;
let $row_number= 1;
let $run= 1;
let $counter= 300;
while ($run)
{
let $sql_result= query_get_value("SHOW SLAVE STATUS", Slave_SQL_Running, $row_number);
if (`SELECT '$sql_result' = 'Yes'`){
let $run= 0;
}
sleep 0.1;
if (!$counter){
--echo "Failed while waiting for slave SQL to start"
query_vertical SHOW SLAVE STATUS;
exit;
}
dec $counter;
}
...@@ -226,7 +226,7 @@ sect test count ...@@ -226,7 +226,7 @@ sect test count
2 6 0 2 6 0
2 6 183 2 6 183
2 7 0 2 7 0
DROP TABLE t1, logtbl; DROP TABLE t1, t2, logtbl;
DROP PROCEDURE just_log; DROP PROCEDURE just_log;
DROP PROCEDURE log_me; DROP PROCEDURE log_me;
DROP PROCEDURE log_me_inner; DROP PROCEDURE log_me_inner;
......
...@@ -37,7 +37,7 @@ Replicate_Ignore_Table # ...@@ -37,7 +37,7 @@ Replicate_Ignore_Table #
Replicate_Wild_Do_Table Replicate_Wild_Do_Table
Replicate_Wild_Ignore_Table Replicate_Wild_Ignore_Table
Last_Errno 1146 Last_Errno 1146
Last_Error Error 'Table 'test.t1' doesn't exist' on opening table `test`.`t1` Last_Error Error 'Table 'test.t1' doesn't exist' on opening tables
Skip_Counter 0 Skip_Counter 0
Exec_Master_Log_Pos 941 Exec_Master_Log_Pos 941
Relay_Log_Space # Relay_Log_Space #
...@@ -55,5 +55,5 @@ Master_SSL_Verify_Server_Cert No ...@@ -55,5 +55,5 @@ Master_SSL_Verify_Server_Cert No
Last_IO_Errno # Last_IO_Errno #
Last_IO_Error # Last_IO_Error #
Last_SQL_Errno 1146 Last_SQL_Errno 1146
Last_SQL_Error Error 'Table 'test.t1' doesn't exist' on opening table `test`.`t1` Last_SQL_Error Error 'Table 'test.t1' doesn't exist' on opening tables
drop table t1, t2; drop table t1, t2;
...@@ -247,7 +247,7 @@ sync_slave_with_master; ...@@ -247,7 +247,7 @@ sync_slave_with_master;
SELECT * FROM logtbl WHERE sect = 2 ORDER BY sect,test; SELECT * FROM logtbl WHERE sect = 2 ORDER BY sect,test;
connection master; connection master;
DROP TABLE t1, logtbl; DROP TABLE t1, t2, logtbl;
DROP PROCEDURE just_log; DROP PROCEDURE just_log;
DROP PROCEDURE log_me; DROP PROCEDURE log_me;
DROP PROCEDURE log_me_inner; DROP PROCEDURE log_me_inner;
......
...@@ -36,15 +36,16 @@ connection slave; ...@@ -36,15 +36,16 @@ connection slave;
# 4. Restart slave without privileges # 4. Restart slave without privileges
# (slave.err will contain access denied error for this START SLAVE command) # (slave.err will contain access denied error for this START SLAVE command)
stop slave; stop slave;
source include/wait_for_slave_to_stop.inc;
start slave; start slave;
source include/wait_for_slave_sql_to_start.inc;
# 5. Make sure Slave_IO_Running = No # 5. Make sure Slave_IO_Running = No
--replace_result $MASTER_MYPORT MASTER_MYPORT --replace_result $MASTER_MYPORT MASTER_MYPORT
# Column 1 is replaced, since the output can be either # Column 1 is replaced, since the output can be either
# "Connecting to master" or "Waiting for master update" # "Connecting to master" or "Waiting for master update"
--replace_column 1 # 7 # 8 # 9 # 22 # 23 # 35 # 36 # --replace_column 1 # 7 # 8 # 9 # 22 # 23 # 35 # 36 #
--vertical_results query_vertical show slave status;
show slave status;
# Cleanup (Note that slave IO thread is not running) # Cleanup (Note that slave IO thread is not running)
connection slave; connection slave;
......
...@@ -6425,15 +6425,29 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) ...@@ -6425,15 +6425,29 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
*/ */
if (!thd->lock) if (!thd->lock)
{ {
bool need_reopen= 1; /* To execute the first lap of the loop below */
/* /*
lock_tables() reads the contents of thd->lex, so they must be Lock_tables() reads the contents of thd->lex, so they must be
initialized. Contrary to in initialized.
Table_map_log_event::do_apply_event() we don't call
mysql_init_query() as that may reset the binlog format. We also call the mysql_reset_thd_for_next_command(), since this
is the logical start of the next "statement". Note that this
call might reset the value of current_stmt_binlog_row_based, so
we need to do any changes to that value after this function.
*/ */
lex_start(thd); lex_start(thd);
mysql_reset_thd_for_next_command(thd);
/*
Check if the slave is set to use SBR. If so, it should switch
to using RBR until the end of the "statement", i.e., next
STMT_END_F or next error.
*/
if (!thd->current_stmt_binlog_row_based &&
mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG))
{
thd->set_current_stmt_binlog_row_based();
}
/* /*
There are a few flags that are replicated with each row event. There are a few flags that are replicated with each row event.
...@@ -6452,72 +6466,23 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) ...@@ -6452,72 +6466,23 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
/* A small test to verify that objects have consistent types */ /* A small test to verify that objects have consistent types */
DBUG_ASSERT(sizeof(thd->options) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS)); DBUG_ASSERT(sizeof(thd->options) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS));
if (simple_open_n_lock_tables(thd, rli->tables_to_lock))
while ((error= lock_tables(thd, rli->tables_to_lock,
rli->tables_to_lock_count, &need_reopen)))
{ {
if (!need_reopen) uint actual_error= thd->main_da.sql_errno();
{ if (thd->is_slave_error || thd->is_fatal_error)
if (thd->is_slave_error || thd->is_fatal_error)
{
/*
Error reporting borrowed from Query_log_event with many excessive
simplifications (we don't honour --slave-skip-errors)
*/
uint actual_error= thd->main_da.sql_errno();
rli->report(ERROR_LEVEL, actual_error,
"Error '%s' in %s event: when locking tables",
(actual_error ? thd->main_da.message():
"unexpected success or fatal error"),
get_type_str());
thd->is_fatal_error= 1;
}
else
{
rli->report(ERROR_LEVEL, error,
"Error in %s event: when locking tables",
get_type_str());
}
const_cast<Relay_log_info*>(rli)->clear_tables_to_lock();
DBUG_RETURN(error);
}
/*
So we need to reopen the tables.
We need to flush the pending RBR event, since it keeps a
pointer to an open table.
ALTERNATIVE SOLUTION (not implemented): Extract a pointer to
the pending RBR event and reset the table pointer after the
tables has been reopened.
NOTE: For this new scheme there should be no pending event:
need to add code to assert that is the case.
*/
thd->binlog_flush_pending_rows_event(false);
TABLE_LIST *tables= rli->tables_to_lock;
close_tables_for_reopen(thd, &tables);
uint tables_count= rli->tables_to_lock_count;
if ((error= open_tables(thd, &tables, &tables_count, 0)))
{ {
if (thd->is_slave_error || thd->is_fatal_error) /*
{ Error reporting borrowed from Query_log_event with many excessive
/* simplifications (we don't honour --slave-skip-errors)
Error reporting borrowed from Query_log_event with many excessive */
simplifications (we don't honour --slave-skip-errors) rli->report(ERROR_LEVEL, actual_error,
*/ "Error '%s' on opening tables",
uint actual_error= thd->main_da.sql_errno(); (actual_error ? thd->main_da.message() :
rli->report(ERROR_LEVEL, actual_error, "unexpected success or fatal error"));
"Error '%s' on reopening tables", thd->is_slave_error= 1;
(actual_error ? thd->main_da.message() :
"unexpected success or fatal error"));
thd->is_slave_error= 1;
}
const_cast<Relay_log_info*>(rli)->clear_tables_to_lock();
DBUG_RETURN(error);
} }
const_cast<Relay_log_info*>(rli)->clear_tables_to_lock();
DBUG_RETURN(actual_error);
} }
/* /*
...@@ -6570,6 +6535,8 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) ...@@ -6570,6 +6535,8 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
table= table=
m_table= const_cast<Relay_log_info*>(rli)->m_table_map.get_table(m_table_id); m_table= const_cast<Relay_log_info*>(rli)->m_table_map.get_table(m_table_id);
DBUG_PRINT("debug", ("m_table: 0x%lx, m_table_id: %lu", (ulong) m_table, m_table_id));
if (table) if (table)
{ {
/* /*
...@@ -7293,71 +7260,7 @@ int Table_map_log_event::do_apply_event(Relay_log_info const *rli) ...@@ -7293,71 +7260,7 @@ int Table_map_log_event::do_apply_event(Relay_log_info const *rli)
} }
else else
{ {
/*
open_tables() reads the contents of thd->lex, so they must be
initialized, so we should call lex_start(); to be even safer, we
call mysql_init_query() which does a more complete set of inits.
*/
lex_start(thd);
mysql_reset_thd_for_next_command(thd);
/*
Check if the slave is set to use SBR. If so, it should switch
to using RBR until the end of the "statement", i.e., next
STMT_END_F or next error.
*/
if (!thd->current_stmt_binlog_row_based &&
mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG))
{
thd->set_current_stmt_binlog_row_based();
}
/*
Open the table if it is not already open and add the table to
table map. Note that for any table that should not be
replicated, a filter is needed.
The creation of a new TABLE_LIST is used to up-cast the
table_list consisting of RPL_TABLE_LIST items. This will work
since the only case where the argument to open_tables() is
changed, is when thd->lex->query_tables == table_list, i.e.,
when the statement requires prelocking. Since this is not
executed when a statement is executed, this case will not occur.
As a precaution, an assertion is added to ensure that the bad
case is not a fact.
Either way, the memory in the list is *never* released
internally in the open_tables() function, hence we take a copy
of the pointer to make sure that it's not lost.
*/
uint count;
DBUG_ASSERT(thd->lex->query_tables != table_list); DBUG_ASSERT(thd->lex->query_tables != table_list);
TABLE_LIST *tmp_table_list= table_list;
if ((error= open_tables(thd, &tmp_table_list, &count, 0)))
{
if (thd->is_slave_error || thd->is_fatal_error)
{
/*
Error reporting borrowed from Query_log_event with many excessive
simplifications (we don't honour --slave-skip-errors)
*/
uint actual_error= thd->main_da.sql_errno();
rli->report(ERROR_LEVEL, actual_error,
"Error '%s' on opening table `%s`.`%s`",
(actual_error ? thd->main_da.message() :
"unexpected success or fatal error"),
table_list->db, table_list->table_name);
thd->is_slave_error= 1;
}
goto err;
}
m_table= table_list->table;
/*
This will fail later otherwise, the 'in_use' field should be
set to the current thread.
*/
DBUG_ASSERT(m_table->in_use);
/* /*
Use placement new to construct the table_def instance in the Use placement new to construct the table_def instance in the
...@@ -7383,10 +7286,6 @@ int Table_map_log_event::do_apply_event(Relay_log_info const *rli) ...@@ -7383,10 +7286,6 @@ int Table_map_log_event::do_apply_event(Relay_log_info const *rli)
} }
DBUG_RETURN(error); DBUG_RETURN(error);
err:
my_free(memory, MYF(MY_WME));
DBUG_RETURN(error);
} }
Log_event::enum_skip_reason Log_event::enum_skip_reason
......
...@@ -53,81 +53,46 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info ...@@ -53,81 +53,46 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info
*/ */
if (!thd->lock) if (!thd->lock)
{ {
bool need_reopen= 1; /* To execute the first lap of the loop below */
/* /*
lock_tables() reads the contents of thd->lex, so they must be Lock_tables() reads the contents of thd->lex, so they must be
initialized. Contrary to in initialized.
Table_map_log_event::do_apply_event() we don't call
mysql_init_query() as that may reset the binlog format. We also call the mysql_reset_thd_for_next_command(), since this
is the logical start of the next "statement". Note that this
call might reset the value of current_stmt_binlog_row_based, so
we need to do any changes to that value after this function.
*/ */
lex_start(thd); lex_start(thd);
mysql_reset_thd_for_next_command(thd);
while ((error= lock_tables(thd, rli->tables_to_lock, /*
rli->tables_to_lock_count, &need_reopen))) Check if the slave is set to use SBR. If so, it should switch
to using RBR until the end of the "statement", i.e., next
STMT_END_F or next error.
*/
if (!thd->current_stmt_binlog_row_based &&
mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG))
{ {
if (!need_reopen) thd->set_current_stmt_binlog_row_based();
{ }
if (thd->is_slave_error || thd->is_fatal_error)
{
/*
Error reporting borrowed from Query_log_event with many excessive
simplifications (we don't honour --slave-skip-errors)
*/
uint actual_error= thd->main_da.sql_errno();
rli->report(ERROR_LEVEL, actual_error,
"Error '%s' in %s event: when locking tables",
(actual_error ? thd->main_da.message() :
"unexpected success or fatal error"),
ev->get_type_str());
thd->is_fatal_error= 1;
}
else
{
rli->report(ERROR_LEVEL, error,
"Error in %s event: when locking tables",
ev->get_type_str());
}
const_cast<Relay_log_info*>(rli)->clear_tables_to_lock();
DBUG_RETURN(error);
}
/*
So we need to reopen the tables.
We need to flush the pending RBR event, since it keeps a
pointer to an open table.
ALTERNATIVE SOLUTION (not implemented): Extract a pointer to
the pending RBR event and reset the table pointer after the
tables has been reopened.
NOTE: For this new scheme there should be no pending event:
need to add code to assert that is the case.
*/
thd->binlog_flush_pending_rows_event(false);
TABLE_LIST *tables= rli->tables_to_lock;
close_tables_for_reopen(thd, &tables);
uint tables_count= rli->tables_to_lock_count; if (simple_open_n_lock_tables(thd, rli->tables_to_lock))
if ((error= open_tables(thd, &tables, &tables_count, 0))) {
uint actual_error= thd->main_da.sql_errno();
if (thd->is_slave_error || thd->is_fatal_error)
{ {
if (thd->is_slave_error || thd->is_fatal_error) /*
{ Error reporting borrowed from Query_log_event with many excessive
/* simplifications (we don't honour --slave-skip-errors)
Error reporting borrowed from Query_log_event with many excessive */
simplifications (we don't honour --slave-skip-errors) rli->report(ERROR_LEVEL, actual_error,
*/ "Error '%s' on opening tables",
uint actual_error= thd->main_da.sql_errno(); (actual_error ? thd->main_da.message() :
rli->report(ERROR_LEVEL, actual_error, "unexpected success or fatal error"));
"Error '%s' on reopening tables", thd->is_slave_error= 1;
(actual_error ? thd->main_da.message() :
"unexpected success or fatal error"));
thd->is_slave_error= 1;
}
const_cast<Relay_log_info*>(rli)->clear_tables_to_lock();
DBUG_RETURN(error);
} }
const_cast<Relay_log_info*>(rli)->clear_tables_to_lock();
DBUG_RETURN(actual_error);
} }
/* /*
......
...@@ -4367,6 +4367,11 @@ bool fix_merge_after_open(TABLE_LIST *old_child_list, TABLE_LIST **old_last, ...@@ -4367,6 +4367,11 @@ bool fix_merge_after_open(TABLE_LIST *old_child_list, TABLE_LIST **old_last,
prelocking it won't do such precaching and will simply reuse table list prelocking it won't do such precaching and will simply reuse table list
which is already built. which is already built.
If any table has a trigger and start->trg_event_map is non-zero
the final lock will end up in thd->locked_tables, otherwise, the
lock will be placed in thd->lock. See also comments in
st_lex::set_trg_event_type_for_tables().
RETURN RETURN
0 - OK 0 - OK
-1 - error -1 - error
...@@ -4579,7 +4584,7 @@ int open_tables(THD *thd, TABLE_LIST **start, uint *counter, uint flags) ...@@ -4579,7 +4584,7 @@ int open_tables(THD *thd, TABLE_LIST **start, uint *counter, uint flags)
process its triggers since they never will be activated. process its triggers since they never will be activated.
*/ */
if (!thd->prelocked_mode && !thd->lex->requires_prelocking() && if (!thd->prelocked_mode && !thd->lex->requires_prelocking() &&
tables->table->triggers && tables->trg_event_map && tables->table->triggers &&
tables->lock_type >= TL_WRITE_ALLOW_WRITE) tables->lock_type >= TL_WRITE_ALLOW_WRITE)
{ {
if (!query_tables_last_own) if (!query_tables_last_own)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment