Commit d08b893b authored by Kristian Nielsen's avatar Kristian Nielsen

MDEV-6775: Wrong binlog order in parallel replication: Intermediate commit

The code in binlog group commit around wait_for_commit that controls commit
order, did the wakeup of subsequent commits early, as soon as a following
transaction is put into the group commit queue, but before any such commit has
actually taken place. This causes problems with too early wakeup of
transactions that need to wait for prior to commit, but do not take part in
the binlog group commit for one reason or the other.

This patch solves the problem, by moving the wakeup to happen only after the
binlog group commit is completed.

This requires a new solution to ensure that transactions that arrive later
than the leader are still able to participate in group commit. This patch
introduces a flag wait_for_commit::commit_started. When this is set, a waiter
can queue up itself in the group commit queue.

This way, effectively the wait_for_prior_commit() is skipped only for
transactions that participate in group commit, so that skipping the wait is
safe. Other transactions still wait as needed for correctness.
parent eec04fb4
......@@ -923,6 +923,55 @@ a
32
33
34
*** MDEV-6775: Wrong binlog order in parallel replication ***
DELETE FROM t4;
INSERT INTO t4 VALUES (1,NULL), (3,NULL), (4,4), (5, NULL), (6, 6);
include/save_master_gtid.inc
include/sync_with_master_gtid.inc
include/stop_slave.inc
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,inject_binlog_commit_before_get_LOCK_log";
SET @old_format=@@GLOBAL.binlog_format;
SET GLOBAL binlog_format=ROW;
SET GLOBAL slave_parallel_threads=0;
SET GLOBAL slave_parallel_threads=10;
SET @old_format= @@binlog_format;
SET binlog_format= statement;
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';
UPDATE t4 SET b=NULL WHERE a=6;
SET debug_sync='now WAIT_FOR master_queued1';
SET @old_format= @@binlog_format;
SET binlog_format= statement;
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';
DELETE FROM t4 WHERE b <= 3;
SET debug_sync='now WAIT_FOR master_queued2';
SET debug_sync='now SIGNAL master_cont1';
SET binlog_format= @old_format;
SET binlog_format= @old_format;
SET debug_sync='RESET';
SELECT * FROM t4 ORDER BY a;
a b
1 NULL
3 NULL
4 4
5 NULL
6 NULL
include/start_slave.inc
SET debug_sync= 'now WAIT_FOR waiting';
SELECT * FROM t4 ORDER BY a;
a b
1 NULL
3 NULL
4 4
5 NULL
6 NULL
SET debug_sync= 'now SIGNAL cont';
include/stop_slave.inc
SET GLOBAL debug_dbug=@old_dbug;
SET GLOBAL binlog_format= @old_format;
SET GLOBAL slave_parallel_threads=0;
SET GLOBAL slave_parallel_threads=10;
include/start_slave.inc
include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc
......
......@@ -1466,6 +1466,75 @@ SET sql_slave_skip_counter= 1;
SELECT * FROM t2 WHERE a >= 30 ORDER BY a;
--echo *** MDEV-6775: Wrong binlog order in parallel replication ***
--connection server_1
# A bit tricky bug to reproduce. On the master, we binlog in statement-mode
# two transactions, an UPDATE followed by a DELETE. On the slave, we replicate
# with binlog-mode set to ROW, which means the DELETE, which modifies no rows,
# is not binlogged. Then we inject a wait in the group commit code on the
# slave, shortly before the actual commit of the UPDATE. The bug was that the
# DELETE could wake up from wait_for_prior_commit() before the commit of the
# UPDATE. So the test could see the slave position updated to after DELETE,
# while the UPDATE was still not visible.
DELETE FROM t4;
INSERT INTO t4 VALUES (1,NULL), (3,NULL), (4,4), (5, NULL), (6, 6);
--source include/save_master_gtid.inc
--connection server_2
--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,inject_binlog_commit_before_get_LOCK_log";
SET @old_format=@@GLOBAL.binlog_format;
SET GLOBAL binlog_format=ROW;
# Re-spawn the worker threads to be sure they pick up the new binlog format
SET GLOBAL slave_parallel_threads=0;
SET GLOBAL slave_parallel_threads=10;
--connection con1
SET @old_format= @@binlog_format;
SET binlog_format= statement;
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';
send UPDATE t4 SET b=NULL WHERE a=6;
--connection server_1
SET debug_sync='now WAIT_FOR master_queued1';
--connection con2
SET @old_format= @@binlog_format;
SET binlog_format= statement;
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';
send DELETE FROM t4 WHERE b <= 3;
--connection server_1
SET debug_sync='now WAIT_FOR master_queued2';
SET debug_sync='now SIGNAL master_cont1';
--connection con1
REAP;
SET binlog_format= @old_format;
--connection con2
REAP;
SET binlog_format= @old_format;
SET debug_sync='RESET';
--save_master_pos
SELECT * FROM t4 ORDER BY a;
--connection server_2
--source include/start_slave.inc
SET debug_sync= 'now WAIT_FOR waiting';
--sync_with_master
SELECT * FROM t4 ORDER BY a;
SET debug_sync= 'now SIGNAL cont';
# Re-spawn the worker threads to remove any DBUG injections or DEBUG_SYNC.
--source include/stop_slave.inc
SET GLOBAL debug_dbug=@old_dbug;
SET GLOBAL binlog_format= @old_format;
SET GLOBAL slave_parallel_threads=0;
SET GLOBAL slave_parallel_threads=10;
--source include/start_slave.inc
--connection server_2
--source include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
......
This diff is collapsed.
......@@ -6342,6 +6342,7 @@ wait_for_commit::reinit()
opaque_pointer= NULL;
wakeup_error= 0;
wakeup_subsequent_commits_running= false;
commit_started= false;
#ifdef SAFE_MUTEX
/*
When using SAFE_MUTEX, the ordering between taking the LOCK_wait_commit
......
......@@ -1713,6 +1713,16 @@ struct wait_for_commit
on that function for details.
*/
bool wakeup_subsequent_commits_running;
/*
This flag can be set when a commit starts, but has not completed yet.
It is used by binlog group commit to allow a waiting transaction T2 to
join the group commit of an earlier transaction T1. When T1 has queued
itself for group commit, it will set the commit_started flag. Then when
T2 becomes ready to commit and needs to wait for T1 to commit first, T2
can queue itself before waiting, and thereby participate in the same
group commit as T1.
*/
bool commit_started;
void register_wait_for_prior_commit(wait_for_commit *waitee);
int wait_for_prior_commit(THD *thd)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment