Commit 00e7ec42 authored by unknown's avatar unknown

Fix for:

Bug #4810 "deadlock with KILL when the victim was in a wait state"
(I included mutex unlock into exit_cond() for future safety)
and BUG#4827 "KILL while START SLAVE may lead to replication slave crash"


sql/lock.cc:
  we did exit_cond() before unlock(LOCK_open), which led to deadlocks with THD::awake(). Fixing this.
sql/log.cc:
  mutex unlock is now included in exit_cond()
sql/repl_failsafe.cc:
  we did exit_cond() before unlock(LOCK_rpl_status), which led to deadlocks with THD::awake(). Fixing this.
sql/slave.cc:
  we did exit_cond() before unlock(cond_lock), which led to deadlocks with THD::awake(). Fixing this.
  Fixing also that if killed while waiting for slave thread to start, we don't release the mutex
  (that caused a double release of the mutex => crash).
sql/sql_class.h:
  comments about exit_cond()/enter_cond().
  Mutex unlock is now included in exit_cond() so that it's always done in the good order.
sql/sql_table.cc:
  unlock is now included in exit_cond().
parent 42ed0103
...@@ -692,15 +692,14 @@ bool lock_global_read_lock(THD *thd) ...@@ -692,15 +692,14 @@ bool lock_global_read_lock(THD *thd)
while (protect_against_global_read_lock && !thd->killed) while (protect_against_global_read_lock && !thd->killed)
pthread_cond_wait(&COND_refresh, &LOCK_open); pthread_cond_wait(&COND_refresh, &LOCK_open);
waiting_for_read_lock--; waiting_for_read_lock--;
thd->exit_cond(old_message);
if (thd->killed) if (thd->killed)
{ {
(void) pthread_mutex_unlock(&LOCK_open); thd->exit_cond(old_message);
DBUG_RETURN(1); DBUG_RETURN(1);
} }
thd->global_read_lock=1; thd->global_read_lock=1;
global_read_lock++; global_read_lock++;
(void) pthread_mutex_unlock(&LOCK_open); thd->exit_cond(old_message);
} }
DBUG_RETURN(0); DBUG_RETURN(0);
} }
...@@ -721,11 +720,12 @@ void unlock_global_read_lock(THD *thd) ...@@ -721,11 +720,12 @@ void unlock_global_read_lock(THD *thd)
bool wait_if_global_read_lock(THD *thd, bool abort_on_refresh) bool wait_if_global_read_lock(THD *thd, bool abort_on_refresh)
{ {
const char *old_message; const char *old_message;
bool result=0; bool result= 0, need_exit_cond;
DBUG_ENTER("wait_if_global_read_lock"); DBUG_ENTER("wait_if_global_read_lock");
LINT_INIT(old_message);
(void) pthread_mutex_lock(&LOCK_open); (void) pthread_mutex_lock(&LOCK_open);
if (global_read_lock) if (need_exit_cond= (bool)global_read_lock)
{ {
if (thd->global_read_lock) // This thread had the read locks if (thd->global_read_lock) // This thread had the read locks
{ {
...@@ -740,10 +740,12 @@ bool wait_if_global_read_lock(THD *thd, bool abort_on_refresh) ...@@ -740,10 +740,12 @@ bool wait_if_global_read_lock(THD *thd, bool abort_on_refresh)
(void) pthread_cond_wait(&COND_refresh,&LOCK_open); (void) pthread_cond_wait(&COND_refresh,&LOCK_open);
if (thd->killed) if (thd->killed)
result=1; result=1;
thd->exit_cond(old_message);
} }
if (!abort_on_refresh && !result) if (!abort_on_refresh && !result)
protect_against_global_read_lock++; protect_against_global_read_lock++;
if (unlikely(need_exit_cond)) // global read locks are rare
thd->exit_cond(old_message);
else
pthread_mutex_unlock(&LOCK_open); pthread_mutex_unlock(&LOCK_open);
DBUG_RETURN(result); DBUG_RETURN(result);
} }
......
...@@ -1533,12 +1533,8 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length, ...@@ -1533,12 +1533,8 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length,
NOTES NOTES
One must have a lock on LOCK_log before calling this function. One must have a lock on LOCK_log before calling this function.
This lock will be freed before return! This lock will be freed before return! That's required by
THD::enter_cond() (see NOTES in sql_class.h).
The reason for the above is that for enter_cond() / exit_cond() to
work the mutex must be got before enter_cond() but releases before
exit_cond().
If you don't do it this way, you will get a deadlock in THD::awake()
*/ */
...@@ -1551,7 +1547,6 @@ the I/O slave thread to update it" : ...@@ -1551,7 +1547,6 @@ the I/O slave thread to update it" :
"Has sent all binlog to slave; \ "Has sent all binlog to slave; \
waiting for binlog to be updated"); waiting for binlog to be updated");
pthread_cond_wait(&update_cond, &LOCK_log); pthread_cond_wait(&update_cond, &LOCK_log);
pthread_mutex_unlock(&LOCK_log); // See NOTES
thd->exit_cond(old_msg); thd->exit_cond(old_msg);
} }
......
...@@ -584,6 +584,8 @@ pthread_handler_decl(handle_failsafe_rpl,arg) ...@@ -584,6 +584,8 @@ pthread_handler_decl(handle_failsafe_rpl,arg)
THD *thd = new THD; THD *thd = new THD;
thd->thread_stack = (char*)&thd; thd->thread_stack = (char*)&thd;
MYSQL* recovery_captain = 0; MYSQL* recovery_captain = 0;
const char* msg;
pthread_detach_this_thread(); pthread_detach_this_thread();
if (init_failsafe_rpl_thread(thd) || !(recovery_captain=mc_mysql_init(0))) if (init_failsafe_rpl_thread(thd) || !(recovery_captain=mc_mysql_init(0)))
{ {
...@@ -591,11 +593,11 @@ pthread_handler_decl(handle_failsafe_rpl,arg) ...@@ -591,11 +593,11 @@ pthread_handler_decl(handle_failsafe_rpl,arg)
goto err; goto err;
} }
pthread_mutex_lock(&LOCK_rpl_status); pthread_mutex_lock(&LOCK_rpl_status);
msg= thd->enter_cond(&COND_rpl_status,
&LOCK_rpl_status, "Waiting for request");
while (!thd->killed && !abort_loop) while (!thd->killed && !abort_loop)
{ {
bool break_req_chain = 0; bool break_req_chain = 0;
const char* msg = thd->enter_cond(&COND_rpl_status,
&LOCK_rpl_status, "Waiting for request");
pthread_cond_wait(&COND_rpl_status, &LOCK_rpl_status); pthread_cond_wait(&COND_rpl_status, &LOCK_rpl_status);
thd->proc_info="Processing request"; thd->proc_info="Processing request";
while (!break_req_chain) while (!break_req_chain)
...@@ -613,9 +615,8 @@ pthread_handler_decl(handle_failsafe_rpl,arg) ...@@ -613,9 +615,8 @@ pthread_handler_decl(handle_failsafe_rpl,arg)
break; break;
} }
} }
thd->exit_cond(msg);
} }
pthread_mutex_unlock(&LOCK_rpl_status); thd->exit_cond(msg);
err: err:
if (recovery_captain) if (recovery_captain)
mc_mysql_close(recovery_captain); mc_mysql_close(recovery_captain);
......
...@@ -582,7 +582,7 @@ int start_slave_thread(pthread_handler h_func, pthread_mutex_t *start_lock, ...@@ -582,7 +582,7 @@ int start_slave_thread(pthread_handler h_func, pthread_mutex_t *start_lock,
pthread_mutex_unlock(start_lock); pthread_mutex_unlock(start_lock);
DBUG_RETURN(ER_SLAVE_THREAD); DBUG_RETURN(ER_SLAVE_THREAD);
} }
if (start_cond && cond_lock) if (start_cond && cond_lock) // caller has cond_lock
{ {
THD* thd = current_thd; THD* thd = current_thd;
while (start_id == *slave_run_id) while (start_id == *slave_run_id)
...@@ -592,13 +592,11 @@ int start_slave_thread(pthread_handler h_func, pthread_mutex_t *start_lock, ...@@ -592,13 +592,11 @@ int start_slave_thread(pthread_handler h_func, pthread_mutex_t *start_lock,
"Waiting for slave thread to start"); "Waiting for slave thread to start");
pthread_cond_wait(start_cond,cond_lock); pthread_cond_wait(start_cond,cond_lock);
thd->exit_cond(old_msg); thd->exit_cond(old_msg);
pthread_mutex_lock(cond_lock); // re-acquire it as exit_cond() released
if (thd->killed) if (thd->killed)
{
pthread_mutex_unlock(cond_lock);
DBUG_RETURN(ER_SERVER_SHUTDOWN); DBUG_RETURN(ER_SERVER_SHUTDOWN);
} }
} }
}
if (start_lock) if (start_lock)
pthread_mutex_unlock(start_lock); pthread_mutex_unlock(start_lock);
DBUG_RETURN(0); DBUG_RETURN(0);
...@@ -1561,7 +1559,6 @@ thread to free enough relay log space"); ...@@ -1561,7 +1559,6 @@ thread to free enough relay log space");
!rli->ignore_log_space_limit) !rli->ignore_log_space_limit)
pthread_cond_wait(&rli->log_space_cond, &rli->log_space_lock); pthread_cond_wait(&rli->log_space_cond, &rli->log_space_lock);
thd->exit_cond(save_proc_info); thd->exit_cond(save_proc_info);
pthread_mutex_unlock(&rli->log_space_lock);
DBUG_RETURN(slave_killed); DBUG_RETURN(slave_killed);
} }
...@@ -1965,6 +1962,9 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, ...@@ -1965,6 +1962,9 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
(long) timeout)); (long) timeout));
pthread_mutex_lock(&data_lock); pthread_mutex_lock(&data_lock);
const char *msg= thd->enter_cond(&data_cond, &data_lock,
"Waiting for the SQL slave thread to "
"advance position");
/* /*
This function will abort when it notices that some CHANGE MASTER or This function will abort when it notices that some CHANGE MASTER or
RESET MASTER has changed the master info. RESET MASTER has changed the master info.
...@@ -2063,9 +2063,6 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, ...@@ -2063,9 +2063,6 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
//wait for master update, with optional timeout. //wait for master update, with optional timeout.
DBUG_PRINT("info",("Waiting for master update")); DBUG_PRINT("info",("Waiting for master update"));
const char* msg = thd->enter_cond(&data_cond, &data_lock,
"Waiting for the SQL slave thread to \
advance position");
/* /*
We are going to pthread_cond_(timed)wait(); if the SQL thread stops it We are going to pthread_cond_(timed)wait(); if the SQL thread stops it
will wake us up. will wake us up.
...@@ -2087,8 +2084,7 @@ advance position"); ...@@ -2087,8 +2084,7 @@ advance position");
} }
else else
pthread_cond_wait(&data_cond, &data_lock); pthread_cond_wait(&data_cond, &data_lock);
DBUG_PRINT("info",("Got signal of master update")); DBUG_PRINT("info",("Got signal of master update or timed out"));
thd->exit_cond(msg);
if (error == ETIMEDOUT || error == ETIME) if (error == ETIMEDOUT || error == ETIME)
{ {
error= -1; error= -1;
...@@ -2100,7 +2096,7 @@ advance position"); ...@@ -2100,7 +2096,7 @@ advance position");
} }
err: err:
pthread_mutex_unlock(&data_lock); thd->exit_cond(msg);
DBUG_PRINT("exit",("killed: %d abort: %d slave_running: %d \ DBUG_PRINT("exit",("killed: %d abort: %d slave_running: %d \
improper_arguments: %d timed_out: %d", improper_arguments: %d timed_out: %d",
(int) thd->killed, (int) thd->killed,
......
...@@ -537,12 +537,9 @@ public: ...@@ -537,12 +537,9 @@ public:
void awake(bool prepare_to_die); void awake(bool prepare_to_die);
/* /*
For enter_cond() / exit_cond() to work the mutex must be got before For enter_cond() / exit_cond() to work the mutex must be got before
enter_cond() but released before exit_cond() (in 4.1, assertions will soon enter_cond() (in 4.1 an assertion will soon ensure this); this mutex is
ensure this). Use must be: then released by exit_cond(). Use must be:
lock mutex; enter_cond(); ...; unlock mutex; exit_cond(). lock mutex; enter_cond(); your code; exit_cond().
If you don't do it this way, you will get a deadlock if another thread is
doing a THD::awake() on you.
*/ */
inline const char* enter_cond(pthread_cond_t *cond, pthread_mutex_t* mutex, inline const char* enter_cond(pthread_cond_t *cond, pthread_mutex_t* mutex,
const char* msg) const char* msg)
...@@ -555,6 +552,13 @@ public: ...@@ -555,6 +552,13 @@ public:
} }
inline void exit_cond(const char* old_msg) inline void exit_cond(const char* old_msg)
{ {
/*
Putting the mutex unlock in exit_cond() ensures that
mysys_var->current_mutex is always unlocked _before_ mysys_var->mutex is
locked (if that would not be the case, you'll get a deadlock if someone
does a THD::awake() on you).
*/
pthread_mutex_unlock(mysys_var->current_mutex);
pthread_mutex_lock(&mysys_var->mutex); pthread_mutex_lock(&mysys_var->mutex);
mysys_var->current_mutex = 0; mysys_var->current_mutex = 0;
mysys_var->current_cond = 0; mysys_var->current_cond = 0;
......
...@@ -1301,7 +1301,6 @@ static int mysql_admin_table(THD* thd, TABLE_LIST* tables, ...@@ -1301,7 +1301,6 @@ static int mysql_admin_table(THD* thd, TABLE_LIST* tables,
dropping_tables--; dropping_tables--;
} }
thd->exit_cond(old_message); thd->exit_cond(old_message);
pthread_mutex_unlock(&LOCK_open);
if (thd->killed) if (thd->killed)
goto err; goto err;
open_for_modify=0; open_for_modify=0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment