Commit 58d679e3 authored by unknown's avatar unknown

- Fix for bug 651: now a dying SQL slave threads wakes up any waiting MASTER_POS_WAIT().

Could not add a testcase for this: if the test goes into a MASTER_POS_WAIT, it waits
until this terminates (even doing "connection other_con" to launch "stop slave" is blocked).
- In MASTER_POS_WAIT() don't test if the I/O slave is running, but if the SQL thread
is running.
- Some DBUG info for this bugfix.


sql/slave.cc:
  Fix for bug 651: now a dying SQL slave threads wakes up any waiting MASTER_POS_WAIT().
  In MASTER_POS_WAIT() don't test if the I/O slave is running, but if the SQL thread
  is running.
  Some DBUG info.
parent 16ec9365
...@@ -289,6 +289,9 @@ err: ...@@ -289,6 +289,9 @@ err:
pthread_cond_broadcast(&rli->data_cond); pthread_cond_broadcast(&rli->data_cond);
if (need_data_lock) if (need_data_lock)
pthread_mutex_unlock(&rli->data_lock); pthread_mutex_unlock(&rli->data_lock);
/* Isn't this strange: if !need_data_lock, we broadcast with no lock ?? */
pthread_mutex_unlock(log_lock); pthread_mutex_unlock(log_lock);
DBUG_RETURN ((*errmsg) ? 1 : 0); DBUG_RETURN ((*errmsg) ? 1 : 0);
} }
...@@ -1778,6 +1781,13 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, ...@@ -1778,6 +1781,13 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
the master info. To catch this, these commands the master info. To catch this, these commands
modify abort_pos_wait ; we just monitor abort_pos_wait modify abort_pos_wait ; we just monitor abort_pos_wait
and see if it has changed. and see if it has changed.
Why do we have this mechanism instead of simply monitoring slave_running in
the loop (we do this too), as CHANGE MASTER/RESET SLAVE require that the
SQL thread be stopped? This is in case
STOP SLAVE;CHANGE MASTER/RESET SLAVE; START SLAVE;
happens very quickly between the moment pthread_cond_wait() wakes up and
the while() is evaluated: in that case slave_running is again 1 when the
while() is evaluated.
*/ */
init_abort_pos_wait= abort_pos_wait; init_abort_pos_wait= abort_pos_wait;
...@@ -1814,7 +1824,12 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, ...@@ -1814,7 +1824,12 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
//"compare and wait" main loop //"compare and wait" main loop
while (!thd->killed && while (!thd->killed &&
init_abort_pos_wait == abort_pos_wait && init_abort_pos_wait == abort_pos_wait &&
mi->slave_running) /*
formerly we tested mi->slave_running, but what we care about is
rli->slave_running (because this concerns the SQL thread, while
mi->slave_running concerns the I/O thread).
*/
slave_running)
{ {
bool pos_reached; bool pos_reached;
int cmp_result= 0; int cmp_result= 0;
...@@ -1852,6 +1867,10 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, ...@@ -1852,6 +1867,10 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
DBUG_PRINT("info",("Waiting for master update")); DBUG_PRINT("info",("Waiting for master update"));
const char* msg = thd->enter_cond(&data_cond, &data_lock, const char* msg = thd->enter_cond(&data_cond, &data_lock,
"Waiting for master update"); "Waiting for master update");
/*
We are going to pthread_cond_(timed)wait(); if the SQL thread stops it
will wake us up.
*/
if (timeout > 0) if (timeout > 0)
{ {
/* /*
...@@ -1869,6 +1888,7 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, ...@@ -1869,6 +1888,7 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
} }
else else
pthread_cond_wait(&data_cond, &data_lock); pthread_cond_wait(&data_cond, &data_lock);
DBUG_PRINT("info",("Got signal of master update"));
thd->exit_cond(msg); thd->exit_cond(msg);
if (error == ETIMEDOUT || error == ETIME) if (error == ETIMEDOUT || error == ETIME)
{ {
...@@ -1877,6 +1897,7 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, ...@@ -1877,6 +1897,7 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
} }
error=0; error=0;
event_count++; event_count++;
DBUG_PRINT("info",("Testing if killed or SQL thread not running"));
} }
err: err:
...@@ -1885,11 +1906,11 @@ err: ...@@ -1885,11 +1906,11 @@ err:
improper_arguments: %d timed_out: %d", improper_arguments: %d timed_out: %d",
(int) thd->killed, (int) thd->killed,
(int) (init_abort_pos_wait != abort_pos_wait), (int) (init_abort_pos_wait != abort_pos_wait),
(int) mi->slave_running, (int) slave_running,
(int) (error == -2), (int) (error == -2),
(int) (error == -1))); (int) (error == -1)));
if (thd->killed || init_abort_pos_wait != abort_pos_wait || if (thd->killed || init_abort_pos_wait != abort_pos_wait ||
!mi->slave_running) !slave_running)
{ {
error= -2; error= -2;
} }
...@@ -2596,8 +2617,15 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ ...@@ -2596,8 +2617,15 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
VOID(pthread_mutex_unlock(&LOCK_thread_count)); VOID(pthread_mutex_unlock(&LOCK_thread_count));
thd->proc_info = "Waiting for slave mutex on exit"; thd->proc_info = "Waiting for slave mutex on exit";
pthread_mutex_lock(&rli->run_lock); pthread_mutex_lock(&rli->run_lock);
/* We need data_lock, at least to wake up any waiting master_pos_wait() */
pthread_mutex_lock(&rli->data_lock);
DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun
rli->slave_running = 0; /* When master_pos_wait() wakes up it will check this and terminate */
rli->slave_running= 0;
/* Wake up master_pos_wait() */
pthread_mutex_unlock(&rli->data_lock);
DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
pthread_cond_broadcast(&rli->data_cond);
rli->save_temporary_tables = thd->temporary_tables; rli->save_temporary_tables = thd->temporary_tables;
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment