BUG#13861 - START SLAVE UNTIL may stop 1 evnt too late if

            log-slave-updates and circul repl

Slave SQL thread may execute one extra event when there are events
skipped by slave I/O thread (e.g. originated by the same server).
Whereas it was requested not to do so by the UNTIL condition.

This happens because we compare with the end position of previously
executed event. This is fine when there are no skipped by slave I/O
thread events, as end position of previous event equals to start
position of to be executed event. Otherwise this position equals to
start position of skipped event.

This is fixed by:
- reading the event to be executed before checking if the until condition
  is satisfied.
- comparing the start position of the event to be executed. Since we do
  not have the start position available, we compute it by subtracting
  event length from end position (which is available).
- if there are no events on the event queue at the slave sql starting
  time, that meet until condition, we stop immediately, as in this
  case we do not want to wait for next event.
parent b620694b
......@@ -8,15 +8,56 @@ reset master;
change master to master_host="127.0.0.1",master_port=SLAVE_PORT,master_user="root";
start slave;
create table t1 (n int);
stop slave;
create table t2 (n int);
show tables;
Tables_in_test
t1
t2
create table t3 (n int) engine=innodb;
set @a=1;
insert into t3 values(@a);
begin;
insert into t3 values(2);
insert into t3 values(3);
commit;
insert into t3 values(4);
start slave until master_log_file="slave-bin.000001",master_log_pos=195;
Warnings:
Note 1278 It is recommended to use --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you will get problems if you get an unexpected slave's mysqld restart
select master_pos_wait("slave-bin.000001",137);
master_pos_wait("slave-bin.000001",137)
0
show tables;
Tables_in_test
t1
t2
start slave until master_log_file="slave-bin.000001",master_log_pos=438;
Warnings:
Note 1278 It is recommended to use --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you will get problems if you get an unexpected slave's mysqld restart
select * from t3;
n
1
start slave until master_log_file="slave-bin.000001",master_log_pos=663;
Warnings:
Note 1278 It is recommended to use --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you will get problems if you get an unexpected slave's mysqld restart
select * from t3;
n
1
2
3
start slave;
create table t4 (n int);
create table t5 (n int);
create table t6 (n int);
show tables;
Tables_in_test
t1
t2
t3
t4
t5
t6
stop slave;
reset slave;
drop table t1,t4,t5,t6;
drop table t1,t2,t3,t4,t5,t6;
......@@ -7,6 +7,7 @@
# It also will test BUG#13861.
source include/master-slave.inc;
source include/have_innodb.inc;
# set up "dual head"
......@@ -30,45 +31,64 @@ save_master_pos;
connection master;
sync_with_master;
# Now test BUG#13861. This will be enabled when Guilhem fixes this
# bug.
#
# BUG#13861 - START SLAVE UNTIL may stop 1 evnt too late if
# log-slave-updates and circul repl
#
stop slave;
# stop slave
create table t2 (n int); # create one ignored event
# create table t2 (n int); # create one ignored event
save_master_pos;
connection slave;
sync_with_master;
# save_master_pos;
# connection slave;
# sync_with_master;
connection slave;
# connection slave;
show tables;
# show tables;
save_master_pos;
# save_master_pos;
create table t3 (n int) engine=innodb;
set @a=1;
insert into t3 values(@a);
begin;
insert into t3 values(2);
insert into t3 values(3);
commit;
insert into t3 values(4);
# create table t3 (n int);
# connection master;
connection master;
# bug is that START SLAVE UNTIL may stop too late, we test that by
# asking it to stop before creation of t3.
# start slave until master_log_file="slave-bin.000001",master_log_pos=195;
start slave until master_log_file="slave-bin.000001",master_log_pos=195;
# wait until it's started (the position below is the start of "CREATE
# TABLE t2") (otherwise wait_for_slave_to_stop may return at once)
# select master_pos_wait("slave-bin.000001",137);
select master_pos_wait("slave-bin.000001",137);
# wait_for_slave_to_stop;
wait_for_slave_to_stop;
# then BUG#13861 causes t3 to show up below (because stopped too
# late).
# show tables;
show tables;
# ensure that we do not break set @a=1; insert into t3 values(@a);
start slave until master_log_file="slave-bin.000001",master_log_pos=438;
wait_for_slave_to_stop;
select * from t3;
# ensure that we do not break transaction
start slave until master_log_file="slave-bin.000001",master_log_pos=663;
wait_for_slave_to_stop;
select * from t3;
# start slave;
start slave;
# BUG#13023 is that Exec_master_log_pos may stay too low "forever":
......@@ -99,7 +119,7 @@ show tables;
stop slave;
reset slave;
drop table t1,t4,t5,t6; # add t2 and t3 later
drop table t1,t2,t3,t4,t5,t6;
save_master_pos;
connection slave;
......
......@@ -373,6 +373,7 @@ Log_event::Log_event(const char* buf,
#endif
when = uint4korr(buf);
server_id = uint4korr(buf + SERVER_ID_OFFSET);
data_written= uint4korr(buf + EVENT_LEN_OFFSET);
if (description_event->binlog_version==1)
{
log_pos= 0;
......@@ -405,7 +406,7 @@ Log_event::Log_event(const char* buf,
binlog, so which will cause problems if the user uses this value
in CHANGE MASTER).
*/
log_pos+= uint4korr(buf + EVENT_LEN_OFFSET);
log_pos+= data_written;
}
DBUG_PRINT("info", ("log_pos: %lu", (ulong) log_pos));
......
......@@ -3115,6 +3115,11 @@ int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int expected_error)
Check if condition stated in UNTIL clause of START SLAVE is reached.
SYNOPSYS
st_relay_log_info::is_until_satisfied()
master_beg_pos position of the beginning of to be executed event
(not log_pos member of the event that points to the
beginning of the following event)
DESCRIPTION
Checks if UNTIL condition is reached. Uses caching result of last
comparison of current log file name and target log file name. So cached
......@@ -3139,7 +3144,7 @@ int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int expected_error)
false - condition not met
*/
bool st_relay_log_info::is_until_satisfied()
bool st_relay_log_info::is_until_satisfied(my_off_t master_beg_pos)
{
const char *log_name;
ulonglong log_pos;
......@@ -3149,7 +3154,7 @@ bool st_relay_log_info::is_until_satisfied()
if (until_condition == UNTIL_MASTER_POS)
{
log_name= group_master_log_name;
log_pos= group_master_log_pos;
log_pos= master_beg_pos;
}
else
{ /* until_condition == UNTIL_RELAY_POS */
......@@ -3228,28 +3233,6 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
wait for something for example inside of next_event().
*/
pthread_mutex_lock(&rli->data_lock);
/*
This tests if the position of the end of the last previous executed event
hits the UNTIL barrier.
We would prefer to test if the position of the start (or possibly) end of
the to-be-read event hits the UNTIL barrier, this is different if there
was an event ignored by the I/O thread just before (BUG#13861 to be
fixed).
*/
if (rli->until_condition!=RELAY_LOG_INFO::UNTIL_NONE &&
rli->is_until_satisfied())
{
char buf[22];
sql_print_information("Slave SQL thread stopped because it reached its"
" UNTIL position %s", llstr(rli->until_pos(), buf));
/*
Setting abort_slave flag because we do not want additional message about
error in query execution to be printed.
*/
rli->abort_slave= 1;
pthread_mutex_unlock(&rli->data_lock);
return 1;
}
Log_event * ev = next_event(rli);
......@@ -3266,6 +3249,27 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
int type_code = ev->get_type_code();
int exec_res;
/*
This tests if the position of the beginning of the current event
hits the UNTIL barrier.
*/
if (rli->until_condition != RELAY_LOG_INFO::UNTIL_NONE &&
rli->is_until_satisfied((thd->options & OPTION_BEGIN || !ev->log_pos) ?
rli->group_master_log_pos :
ev->log_pos - ev->data_written))
{
char buf[22];
sql_print_information("Slave SQL thread stopped because it reached its"
" UNTIL position %s", llstr(rli->until_pos(), buf));
/*
Setting abort_slave flag because we do not want additional message about
error in query execution to be printed.
*/
rli->abort_slave= 1;
pthread_mutex_unlock(&rli->data_lock);
delete ev;
return 1;
}
/*
Queries originating from this server must be skipped.
Low-level events (Format_desc, Rotate, Stop) from this server
......@@ -3977,6 +3981,22 @@ Slave SQL thread aborted. Can't execute init_slave query");
}
}
/*
First check until condition - probably there is nothing to execute. We
do not want to wait for next event in this case.
*/
pthread_mutex_lock(&rli->data_lock);
if (rli->until_condition != RELAY_LOG_INFO::UNTIL_NONE &&
rli->is_until_satisfied(rli->group_master_log_pos))
{
char buf[22];
sql_print_information("Slave SQL thread stopped because it reached its"
" UNTIL position %s", llstr(rli->until_pos(), buf));
pthread_mutex_unlock(&rli->data_lock);
goto err;
}
pthread_mutex_unlock(&rli->data_lock);
/* Read queries from the IO/THREAD until this thread is killed */
while (!sql_slave_killed(thd,rli))
......
......@@ -348,7 +348,7 @@ typedef struct st_relay_log_info
void close_temporary_tables();
/* Check if UNTIL condition is satisfied. See slave.cc for more. */
bool is_until_satisfied();
bool is_until_satisfied(my_off_t master_beg_pos);
inline ulonglong until_pos()
{
return ((until_condition == UNTIL_MASTER_POS) ? group_master_log_pos :
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment