Commit 21cc7d5a authored by Andrei Elkin's avatar Andrei Elkin

Bug#38205 Row-based Replication (RBR) causes inconsistencies: HA_ERR_FOUND_DUP

Bug#319  if while a non-transactional slave is replicating a transaction possible problem 

It is impossible to roll back a mixed engines transaction when one of the engine is
non-transaction. In replication that fact is crucial because the slave can not safely
re-apply a transction that was interrupted with STOP SLAVE.

Fixed with making STOP SLAVE not be effective immediately in the case the current
group of replication events has modified a non-transaction table. In order for slave to leave
either the group needs finishing or the user issues KILL QUERY|CONNECTION slave_thread_id.


mysql-test/suite/bugs/r/rpl_bug38205.result:
  bug#38205 non-deterministic part of tests results.
mysql-test/suite/bugs/t/rpl_bug38205.test:
  bug#38205 non-deterministic part of tests.
mysql-test/suite/rpl/r/rpl_start_stop_slave.result:
  bug#38205 deterministic part of tests results.
mysql-test/suite/rpl/t/rpl_start_stop_slave-slave.opt:
  increasing `innodb_lock_wait_timeout' to make the test pass on slow env w/o
  timeout expired issue.
mysql-test/suite/rpl/t/rpl_start_stop_slave.test:
  bug#38205 deterministic part of tests.
sql/log_event.cc:
  Augmenting row-based events applying with the notion of 
  thd->transaction.{all,stmt}.modified_non_trans_table.
  The pair is set and reset according to its specification
  for the mixed transaction processing.
  Particualry, once `modified_non_trans_table' is set in the row-events
  processing loop, it will remain till the commit of the transaction.
sql/slave.cc:
  Consulting `thd->transaction.all.modified_non_trans_table' to decide
  whether to terminate by the sql thread or to continue even though
  the sql thread might have been STOP-ed (rli->abort_slave).
parent 58339038
stop slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
reset master;
reset slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
create table t1i(n int primary key) engine=innodb;
create table t2m(n int primary key) engine=myisam;
begin;
insert into t1i values (1);
insert into t1i values (2);
insert into t1i values (3);
commit;
begin;
insert into t1i values (5);
begin;
insert into t1i values (4);
insert into t2m values (1);
update t1i set n = 5 where n = 4;
commit;
zero
0
*** kill sql thread ***
rollback;
*** sql thread is *not* running: No ***
*** the prove: the killed slave has not finished the current transaction ***
three
3
one
1
zero
0
delete from t2m;
start slave sql_thread;
delete from t1i;
delete from t2m;
begin;
insert into t1i values (5);
begin;
insert into t1i values (4);
update t1i set n = 5 where n = 4;
commit;
zero
0
stop slave sql_thread;
rollback;
*** sql thread is running: No ***
*** the prove: the stopped slave has rolled back the current transaction ***
zero
0
zero
0
one
1
start slave sql_thread;
drop table t1i, t2m;
#
# Bug #38205 Row-based Replication (RBR) causes inconsistencies: HA_ERR_FOUND_DUPP_KEY
#
# Verifying the fact that STOP SLAVE in the middle of a group execution waits
# for the end of the group before the slave sql thread will stop.
# The patch refines STOP SLAVE to not interrupt a transaction or other type of
# the replication events group (the part I).
# Killing the sql thread continues to provide a "hard" stop (the part II).
#
# Non-deterministic tests
#
source include/master-slave.inc;
source include/have_innodb.inc;
#
# Part II, killed sql slave leaves instantly
#
# A. multi-statement transaction as the replication group
connection master;
create table t1i(n int primary key) engine=innodb;
create table t2m(n int primary key) engine=myisam;
sync_slave_with_master;
connection master;
begin;
insert into t1i values (1);
insert into t1i values (2);
insert into t1i values (3);
commit;
sync_slave_with_master;
#
# todo: first challenge is to find out the SQL thread id
# the following is not fully reliable
#
let $id=`SELECT id from information_schema.processlist where user like 'system user' and state like '%Has read all relay log%' or user like 'system user' and state like '%Reading event from the relay log%'`;
connection slave;
begin;
insert into t1i values (5);
connection master;
let $pos0_master= query_get_value(SHOW MASTER STATUS, Position, 1);
begin;
insert into t1i values (4);
insert into t2m values (1); # non-ta update
update t1i set n = 5 where n = 4; # to block at. can't be played with killed
commit;
let $pos1_master= query_get_value(SHOW MASTER STATUS, Position, 1);
connection slave;
# slave sql thread must be locked out by the conn `slave' explicit lock
let $pos0_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
--disable_query_log
eval select $pos0_master - $pos0_slave as zero;
--enable_query_log
connection slave1;
let $count= 1;
let $table= t2m;
source include/wait_until_rows_count.inc;
#
# todo: may fail as said above
#
--echo *** kill sql thread ***
--disable_query_log
eval kill connection $id;
--enable_query_log
connection slave;
rollback; # release the sql thread
connection slave1;
source include/wait_for_slave_sql_to_stop.inc;
let $sql_status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running, 1);
--echo *** sql thread is *not* running: $sql_status ***
let $pos1_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
connection slave;
--echo *** the prove: the killed slave has not finished the current transaction ***
--disable_query_log
select count(*) as three from t1i;
eval select $pos1_master > $pos1_slave as one;
eval select $pos1_slave - $pos0_slave as zero;
--enable_query_log
delete from t2m; # remove the row to be able to replay
start slave sql_thread;
#
# Part I: B The homogenous transaction remains interuptable in between
#
connection master;
delete from t1i;
delete from t2m;
sync_slave_with_master;
begin;
insert into t1i values (5);
connection master;
let $pos0_master= query_get_value(SHOW MASTER STATUS, Position, 1);
begin;
insert into t1i values (4);
update t1i set n = 5 where n = 4; # to block at. not to be played
commit;
let $pos1_master= query_get_value(SHOW MASTER STATUS, Position, 1);
connection slave1;
# slave sql can't advance as must be locked by the conn `slave' trans
let $pos0_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
--disable_query_log
eval select $pos0_master - $pos0_slave as zero;
--enable_query_log
#
# the replicated trans is blocked by the slave's local.
# However, it's not easy to catch the exact moment when it happens.
# The test issues sleep which makes the test either non-deterministic or
# wasting too much time.
#
--sleep 3
send stop slave sql_thread;
connection slave;
rollback; # release the sql thread
connection slave1;
reap;
source include/wait_for_slave_sql_to_stop.inc;
let $sql_status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running, 1);
--echo *** sql thread is running: $sql_status ***
let $pos1_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
--echo *** the prove: the stopped slave has rolled back the current transaction ***
--disable_query_log
select count(*) as zero from t1i;
eval select $pos0_master - $pos0_slave as zero;
eval select $pos1_master > $pos0_slave as one;
--enable_query_log
start slave sql_thread;
# clean-up
connection master;
drop table t1i, t2m;
sync_slave_with_master;
......@@ -10,3 +10,31 @@ start slave;
stop slave io_thread;
start slave io_thread;
drop table t1;
create table t1i(n int primary key) engine=innodb;
create table t2m(n int primary key) engine=myisam;
begin;
insert into t1i values (1);
insert into t1i values (2);
insert into t1i values (3);
commit;
begin;
insert into t1i values (5);
begin;
insert into t1i values (4);
insert into t2m values (1);
insert into t1i values (5);
commit;
zero
0
stop slave;
rollback;
*** sql thread is running: No ***
*** the prove: the stopped slave has finished the current transaction ***
five
5
zero
0
one
1
start slave;
drop table t1i, t2m;
source include/master-slave.inc;
source include/have_innodb.inc;
#
# Bug#6148 ()
......@@ -35,4 +36,88 @@ save_master_pos;
connection slave;
sync_with_master;
# End of 4.1 tests
#
# Bug #38205 Row-based Replication (RBR) causes inconsistencies...
#
# Verifying that STOP SLAVE does not interrupt excution of a group
# execution of events if the group can not roll back.
# Killing the sql thread continues to provide a "hard" stop (the
# part II, moved to the bugs suite as it's hard to make it
# deterministic with KILL).
#
#
# Part I. The being stopped sql thread finishes first the current group of
# events if the group contains an event on a non-transaction table.
connection master;
create table t1i(n int primary key) engine=innodb;
create table t2m(n int primary key) engine=myisam;
begin;
insert into t1i values (1);
insert into t1i values (2);
insert into t1i values (3);
commit;
sync_slave_with_master;
connection slave;
begin;
insert into t1i values (5);
connection master;
let $pos0_master= query_get_value(SHOW MASTER STATUS, Position, 1);
begin;
insert into t1i values (4);
insert into t2m values (1); # non-ta update to process
insert into t1i values (5); # to block at. to be played with stopped
commit;
connection slave;
# slave sql thread must be locked out by the conn `slave' explicit lock
let $pos0_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
--disable_query_log
eval select $pos0_master - $pos0_slave as zero;
--enable_query_log
connection slave1;
let $count= 1;
let $table= t2m;
source include/wait_until_rows_count.inc;
send stop slave;
connection slave;
rollback; # release the sql thread
connection slave1;
reap;
source include/wait_for_slave_to_stop.inc;
let $sql_status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running, 1);
--echo *** sql thread is running: $sql_status ***
connection master;
let $pos1_master= query_get_value(SHOW MASTER STATUS, Position, 1);
connection slave;
source include/wait_for_slave_sql_to_stop.inc;
let $pos1_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
--echo *** the prove: the stopped slave has finished the current transaction ***
--disable_query_log
select count(*) as five from t1i;
eval select $pos1_master - $pos1_slave as zero;
eval select $pos1_slave > $pos0_slave as one;
--enable_query_log
start slave;
# clean-up
connection master;
drop table t1i, t2m;
sync_slave_with_master;
# End of tests
......@@ -7114,7 +7114,12 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
*/
lex_start(thd);
mysql_reset_thd_for_next_command(thd);
/*
The current statement is just about to begin and
has not yet modified anything. Note, all.modified is reset
by mysql_reset_thd_for_next_command.
*/
thd->transaction.stmt.modified_non_trans_table= FALSE;
/*
Check if the slave is set to use SBR. If so, it should switch
to using RBR until the end of the "statement", i.e., next
......@@ -7217,6 +7222,7 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
if (table)
{
bool transactional_table= table->file->has_transactions();
/*
table == NULL means that this table should not be replicated
(this was set up by Table_map_log_event::do_apply_event()
......@@ -7348,6 +7354,9 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
m_curr_row= m_curr_row_end;
if (error == 0 && !transactional_table)
thd->transaction.all.modified_non_trans_table=
thd->transaction.stmt.modified_non_trans_table= TRUE;
} // row processing loop
DBUG_EXECUTE_IF("STOP_SLAVE_after_first_Rows_event",
......
......@@ -687,6 +687,9 @@ static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun
if (abort_loop || thd->killed || rli->abort_slave)
{
if (rli->abort_slave && rli->is_in_group() &&
thd->transaction.all.modified_non_trans_table)
DBUG_RETURN(0);
/*
If we are in an unsafe situation (stopping could corrupt replication),
we give one minute to the slave SQL thread of grace before really
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment