Commit 2c2478b8 authored by unknown's avatar unknown

MDEV-5804: If same GTID is received on multiple master connections in...

MDEV-5804: If same GTID is received on multiple master connections in multi-source replication, the event is double-executed causing corruption or replication failure

Before, the arrival of same GTID twice in multi-source replication
would cause double-apply or in gtid strict mode an error.

Keep the behaviour, but add an option --gtid-ignore-duplicates which
allows to correctly handle duplicates, ignoring all but the first.
This relies on the user ensuring correct configuration so that
sequence numbers are strictly increasing within each replication
domain; then duplicates can be detected simply by comparing the
sequence numbers against what is already applied.

Only one master connection (but possibly multiple parallel worker
threads within that connection) is allowed to apply events within
one replication domain at a time; any other connection that
receives a GTID in the same domain either discards it (if it is
already applied) or waits for the other connection to not have
any events to apply.

Intermediate patch, as proof-of-concept for testing. The main limitation
is that currently it is only implemented for parallel replication,
@@slave_parallel_threads > 0.
parent 5c31e79f
!include my.cnf
[mysqld.1]
log-slave-updates
loose-innodb
[mysqld.2]
log-slave-updates
loose-innodb
[mysqld.3]
log-bin=server3-bin
log-slave-updates
loose-innodb
[mysqld.4]
server-id=4
log-bin=server4-bin
log-slave-updates
loose-innodb
[ENV]
SERVER_MYPORT_4= @mysqld.4.port
SERVER_MYSOCK_4= @mysqld.4.socket
*** Test all-to-all replication with --gtid-ignore-duplicates ***
SET @old_parallel= @@GLOBAL.slave_parallel_threads;
SET GLOBAL slave_parallel_threads=5;
SET @old_ignore_duplicates= @@GLOBAL.gtid_ignore_duplicates;
SET GLOBAL gtid_ignore_duplicates=1;
SET GLOBAL gtid_domain_id= 1;
SET SESSION gtid_domain_id= 1;
CHANGE MASTER 'b2a' TO master_port=MYPORT_2, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
CHANGE MASTER 'c2a' TO master_port=MYPORT_3, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
set default_master_connection = 'b2a';
START SLAVE;
include/wait_for_slave_to_start.inc
set default_master_connection = 'c2a';
START SLAVE;
include/wait_for_slave_to_start.inc
set default_master_connection = '';
SET @old_parallel= @@GLOBAL.slave_parallel_threads;
SET GLOBAL slave_parallel_threads=5;
SET @old_ignore_duplicates= @@GLOBAL.gtid_ignore_duplicates;
SET GLOBAL gtid_ignore_duplicates=1;
SET GLOBAL gtid_domain_id= 2;
SET SESSION gtid_domain_id= 2;
CHANGE MASTER 'a2b' TO master_port=MYPORT_1, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
CHANGE MASTER 'c2b' TO master_port=MYPORT_3, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
set default_master_connection = 'a2b';
START SLAVE;
include/wait_for_slave_to_start.inc
set default_master_connection = 'c2b';
START SLAVE;
include/wait_for_slave_to_start.inc
set default_master_connection = '';
SET @old_parallel= @@GLOBAL.slave_parallel_threads;
SET GLOBAL slave_parallel_threads=5;
SET @old_ignore_duplicates= @@GLOBAL.gtid_ignore_duplicates;
SET GLOBAL gtid_ignore_duplicates=1;
SET GLOBAL gtid_domain_id= 3;
SET SESSION gtid_domain_id= 3;
CHANGE MASTER 'a2c' TO master_port=MYPORT_1, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
CHANGE MASTER 'b2c' TO master_port=MYPORT_2, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
set default_master_connection = 'a2c';
START SLAVE;
include/wait_for_slave_to_start.inc
set default_master_connection = 'b2c';
START SLAVE;
include/wait_for_slave_to_start.inc
set default_master_connection = '';
SET @old_parallel= @@GLOBAL.slave_parallel_threads;
SET GLOBAL slave_parallel_threads=5;
SET @old_ignore_duplicates= @@GLOBAL.gtid_ignore_duplicates;
SET GLOBAL gtid_ignore_duplicates=1;
SET GLOBAL gtid_domain_id= 1;
SET SESSION gtid_domain_id= 1;
CHANGE MASTER 'a2d' TO master_port=MYPORT_1, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
set default_master_connection = 'a2d';
START SLAVE;
include/wait_for_slave_to_start.inc
set default_master_connection = '';
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1);
BEGIN;
INSERT INTO t1 VALUES (2);
INSERT INTO t1 VALUES (3);
COMMIT;
INSERT INTO t1 VALUES (4), (5);
INSERT INTO t1 VALUES (6);
include/save_master_gtid.inc
include/sync_with_master_gtid.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
5
6
include/sync_with_master_gtid.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
5
6
include/sync_with_master_gtid.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
5
6
include/sync_with_master_gtid.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
5
6
INSERT INTO t1 VALUES (10);
include/save_master_gtid.inc
include/sync_with_master_gtid.inc
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
a
10
STOP SLAVE "c2b";
SET default_master_connection = "c2b";
include/wait_for_slave_to_stop.inc
STOP SLAVE "a2b";
SET default_master_connection = "a2b";
include/wait_for_slave_to_stop.inc
INSERT INTO t1 VALUES (11);
include/save_master_gtid.inc
include/sync_with_master_gtid.inc
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
a
10
11
SET default_master_connection = "b2a";
STOP SLAVE;
include/wait_for_slave_to_stop.inc
INSERT INTO t1 VALUES (12);
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
a
10
12
include/save_master_gtid.inc
START SLAVE "b2a";
SET default_master_connection = "b2a";
include/wait_for_slave_to_start.inc
include/sync_with_master_gtid.inc
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
a
10
11
12
START SLAVE "c2b";
SET default_master_connection = "c2b";
include/wait_for_slave_to_start.inc
START SLAVE "a2b";
SET default_master_connection = "a2b";
include/wait_for_slave_to_start.inc
include/save_master_gtid.inc
include/sync_with_master_gtid.inc
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
a
10
11
12
SET GLOBAL gtid_domain_id=0;
STOP ALL SLAVES;
Warnings:
Note 1938 SLAVE 'c2a' stopped
Note 1938 SLAVE 'b2a' stopped
include/reset_master_slave.inc
SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
DROP TABLE t1;
SET GLOBAL gtid_domain_id=0;
STOP ALL SLAVES;
Warnings:
Note 1938 SLAVE 'a2b' stopped
Note 1938 SLAVE 'c2b' stopped
include/reset_master_slave.inc
SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
DROP TABLE t1;
SET GLOBAL gtid_domain_id=0;
STOP ALL SLAVES;
Warnings:
Note 1938 SLAVE 'a2c' stopped
Note 1938 SLAVE 'b2c' stopped
include/reset_master_slave.inc
SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
DROP TABLE t1;
SET GLOBAL gtid_domain_id=0;
STOP ALL SLAVES;
Warnings:
Note 1938 SLAVE 'a2d' stopped
include/reset_master_slave.inc
SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
DROP TABLE t1;
--source include/not_embedded.inc
--source include/have_innodb.inc
--echo *** Test all-to-all replication with --gtid-ignore-duplicates ***
--connect (server_1,127.0.0.1,root,,,$SERVER_MYPORT_1)
--connect (server_2,127.0.0.1,root,,,$SERVER_MYPORT_2)
--connect (server_3,127.0.0.1,root,,,$SERVER_MYPORT_3)
--connect (server_4,127.0.0.1,root,,,$SERVER_MYPORT_4)
# Setup A <-> B, B <-> C, C <-> A, and A -> D.
--connection server_1
SET @old_parallel= @@GLOBAL.slave_parallel_threads;
SET GLOBAL slave_parallel_threads=5;
SET @old_ignore_duplicates= @@GLOBAL.gtid_ignore_duplicates;
SET GLOBAL gtid_ignore_duplicates=1;
SET GLOBAL gtid_domain_id= 1;
SET SESSION gtid_domain_id= 1;
--replace_result $SERVER_MYPORT_2 MYPORT_2
eval CHANGE MASTER 'b2a' TO master_port=$SERVER_MYPORT_2, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
--replace_result $SERVER_MYPORT_3 MYPORT_3
eval CHANGE MASTER 'c2a' TO master_port=$SERVER_MYPORT_3, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
set default_master_connection = 'b2a';
START SLAVE;
--source include/wait_for_slave_to_start.inc
set default_master_connection = 'c2a';
START SLAVE;
--source include/wait_for_slave_to_start.inc
set default_master_connection = '';
--connection server_2
SET @old_parallel= @@GLOBAL.slave_parallel_threads;
SET GLOBAL slave_parallel_threads=5;
SET @old_ignore_duplicates= @@GLOBAL.gtid_ignore_duplicates;
SET GLOBAL gtid_ignore_duplicates=1;
SET GLOBAL gtid_domain_id= 2;
SET SESSION gtid_domain_id= 2;
--replace_result $SERVER_MYPORT_1 MYPORT_1
eval CHANGE MASTER 'a2b' TO master_port=$SERVER_MYPORT_1, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
--replace_result $SERVER_MYPORT_3 MYPORT_3
eval CHANGE MASTER 'c2b' TO master_port=$SERVER_MYPORT_3, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
set default_master_connection = 'a2b';
START SLAVE;
--source include/wait_for_slave_to_start.inc
set default_master_connection = 'c2b';
START SLAVE;
--source include/wait_for_slave_to_start.inc
set default_master_connection = '';
--connection server_3
SET @old_parallel= @@GLOBAL.slave_parallel_threads;
SET GLOBAL slave_parallel_threads=5;
SET @old_ignore_duplicates= @@GLOBAL.gtid_ignore_duplicates;
SET GLOBAL gtid_ignore_duplicates=1;
SET GLOBAL gtid_domain_id= 3;
SET SESSION gtid_domain_id= 3;
--replace_result $SERVER_MYPORT_1 MYPORT_1
eval CHANGE MASTER 'a2c' TO master_port=$SERVER_MYPORT_1, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
--replace_result $SERVER_MYPORT_2 MYPORT_2
eval CHANGE MASTER 'b2c' TO master_port=$SERVER_MYPORT_2, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
set default_master_connection = 'a2c';
START SLAVE;
--source include/wait_for_slave_to_start.inc
set default_master_connection = 'b2c';
START SLAVE;
--source include/wait_for_slave_to_start.inc
set default_master_connection = '';
--connection server_4
SET @old_parallel= @@GLOBAL.slave_parallel_threads;
SET GLOBAL slave_parallel_threads=5;
SET @old_ignore_duplicates= @@GLOBAL.gtid_ignore_duplicates;
SET GLOBAL gtid_ignore_duplicates=1;
SET GLOBAL gtid_domain_id= 1;
SET SESSION gtid_domain_id= 1;
--replace_result $SERVER_MYPORT_1 MYPORT_1
eval CHANGE MASTER 'a2d' TO master_port=$SERVER_MYPORT_1, master_host='127.0.0.1', master_user='root', master_use_gtid=slave_pos;
set default_master_connection = 'a2d';
START SLAVE;
--source include/wait_for_slave_to_start.inc
set default_master_connection = '';
--connection server_1
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1);
BEGIN;
INSERT INTO t1 VALUES (2);
INSERT INTO t1 VALUES (3);
COMMIT;
INSERT INTO t1 VALUES (4), (5);
INSERT INTO t1 VALUES (6);
--source include/save_master_gtid.inc
--connection server_2
--source include/sync_with_master_gtid.inc
SELECT * FROM t1 ORDER BY a;
--connection server_3
--source include/sync_with_master_gtid.inc
SELECT * FROM t1 ORDER BY a;
--connection server_4
--source include/sync_with_master_gtid.inc
SELECT * FROM t1 ORDER BY a;
--connection server_1
--source include/sync_with_master_gtid.inc
SELECT * FROM t1 ORDER BY a;
# Test that we can connect at a GTID position that has not yet reached
# that master server.
# We stop the connections C->B and A->B, create an event on C, Check that
# the event has reached A (but not B). Then let A stop and re-connect to
# B, which will connect at the new event, which is in the future for B.
--connection server_3
INSERT INTO t1 VALUES (10);
--source include/save_master_gtid.inc
--connection server_2
--source include/sync_with_master_gtid.inc
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
STOP SLAVE "c2b";
SET default_master_connection = "c2b";
--source include/wait_for_slave_to_stop.inc
STOP SLAVE "a2b";
SET default_master_connection = "a2b";
--source include/wait_for_slave_to_stop.inc
--connection server_3
INSERT INTO t1 VALUES (11);
--source include/save_master_gtid.inc
--connection server_1
--source include/sync_with_master_gtid.inc
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
SET default_master_connection = "b2a";
STOP SLAVE;
--source include/wait_for_slave_to_stop.inc
--connection server_2
INSERT INTO t1 VALUES (12);
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
--source include/save_master_gtid.inc
--connection server_1
START SLAVE "b2a";
SET default_master_connection = "b2a";
--source include/wait_for_slave_to_start.inc
--source include/sync_with_master_gtid.inc
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
--connection server_2
START SLAVE "c2b";
SET default_master_connection = "c2b";
--source include/wait_for_slave_to_start.inc
START SLAVE "a2b";
SET default_master_connection = "a2b";
--source include/wait_for_slave_to_start.inc
--connection server_1
--source include/save_master_gtid.inc
--connection server_2
--source include/sync_with_master_gtid.inc
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
# Clean up.
--connection server_1
SET GLOBAL gtid_domain_id=0;
STOP ALL SLAVES;
--source reset_master_slave.inc
SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
DROP TABLE t1;
--disconnect server_1
--connection server_2
SET GLOBAL gtid_domain_id=0;
STOP ALL SLAVES;
--source reset_master_slave.inc
SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
DROP TABLE t1;
--disconnect server_2
--connection server_3
SET GLOBAL gtid_domain_id=0;
STOP ALL SLAVES;
--source reset_master_slave.inc
SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
DROP TABLE t1;
--disconnect server_3
--connection server_4
SET GLOBAL gtid_domain_id=0;
STOP ALL SLAVES;
--source reset_master_slave.inc
SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
DROP TABLE t1;
--disconnect server_4
...@@ -4440,7 +4440,7 @@ Default database: '%s'. Query: '%s'", ...@@ -4440,7 +4440,7 @@ Default database: '%s'. Query: '%s'",
end: end:
if (sub_id && !thd->is_slave_error) if (sub_id && !thd->is_slave_error)
rpl_global_gtid_slave_state.update_state_hash(sub_id, &gtid); rpl_global_gtid_slave_state.update_state_hash(sub_id, &gtid, rli);
/* /*
Probably we have set thd->query, thd->db, thd->catalog to point to places Probably we have set thd->query, thd->db, thd->catalog to point to places
...@@ -6806,7 +6806,8 @@ Gtid_list_log_event::do_apply_event(rpl_group_info *rgi) ...@@ -6806,7 +6806,8 @@ Gtid_list_log_event::do_apply_event(rpl_group_info *rgi)
sub_id_list[i], sub_id_list[i],
false, false))) false, false)))
return ret; return ret;
rpl_global_gtid_slave_state.update_state_hash(sub_id_list[i], &list[i]); rpl_global_gtid_slave_state.update_state_hash(sub_id_list[i], &list[i],
NULL);
} }
} }
ret= Log_event::do_apply_event(rgi); ret= Log_event::do_apply_event(rgi);
...@@ -7326,7 +7327,7 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi) ...@@ -7326,7 +7327,7 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi)
thd->mdl_context.release_transactional_locks(); thd->mdl_context.release_transactional_locks();
if (!res && sub_id) if (!res && sub_id)
rpl_global_gtid_slave_state.update_state_hash(sub_id, &gtid); rpl_global_gtid_slave_state.update_state_hash(sub_id, &gtid, rli);
/* /*
Increment the global status commit count variable Increment the global status commit count variable
......
...@@ -553,6 +553,7 @@ ulong opt_slave_domain_parallel_threads= 0; ...@@ -553,6 +553,7 @@ ulong opt_slave_domain_parallel_threads= 0;
ulong opt_binlog_commit_wait_count= 0; ulong opt_binlog_commit_wait_count= 0;
ulong opt_binlog_commit_wait_usec= 0; ulong opt_binlog_commit_wait_usec= 0;
ulong opt_slave_parallel_max_queued= 131072; ulong opt_slave_parallel_max_queued= 131072;
my_bool opt_gtid_ignore_duplicates= FALSE;
const double log_10[] = { const double log_10[] = {
1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009, 1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009,
...@@ -987,7 +988,7 @@ PSI_cond_key key_COND_rpl_thread_queue, key_COND_rpl_thread, ...@@ -987,7 +988,7 @@ PSI_cond_key key_COND_rpl_thread_queue, key_COND_rpl_thread,
key_COND_rpl_thread_pool, key_COND_rpl_thread_pool,
key_COND_parallel_entry, key_COND_group_commit_orderer, key_COND_parallel_entry, key_COND_group_commit_orderer,
key_COND_prepare_ordered; key_COND_prepare_ordered;
PSI_cond_key key_COND_wait_gtid; PSI_cond_key key_COND_wait_gtid, key_COND_gtid_ignore_duplicates;
static PSI_cond_info all_server_conds[]= static PSI_cond_info all_server_conds[]=
{ {
...@@ -1035,7 +1036,8 @@ static PSI_cond_info all_server_conds[]= ...@@ -1035,7 +1036,8 @@ static PSI_cond_info all_server_conds[]=
{ &key_COND_parallel_entry, "COND_parallel_entry", 0}, { &key_COND_parallel_entry, "COND_parallel_entry", 0},
{ &key_COND_group_commit_orderer, "COND_group_commit_orderer", 0}, { &key_COND_group_commit_orderer, "COND_group_commit_orderer", 0},
{ &key_COND_prepare_ordered, "COND_prepare_ordered", 0}, { &key_COND_prepare_ordered, "COND_prepare_ordered", 0},
{ &key_COND_wait_gtid, "COND_wait_gtid", 0} { &key_COND_wait_gtid, "COND_wait_gtid", 0},
{ &key_COND_gtid_ignore_duplicates, "COND_gtid_ignore_duplicates", 0}
}; };
PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert, PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert,
......
...@@ -184,6 +184,7 @@ extern ulong opt_slave_domain_parallel_threads; ...@@ -184,6 +184,7 @@ extern ulong opt_slave_domain_parallel_threads;
extern ulong opt_slave_parallel_max_queued; extern ulong opt_slave_parallel_max_queued;
extern ulong opt_binlog_commit_wait_count; extern ulong opt_binlog_commit_wait_count;
extern ulong opt_binlog_commit_wait_usec; extern ulong opt_binlog_commit_wait_usec;
extern my_bool opt_gtid_ignore_duplicates;
extern ulong back_log; extern ulong back_log;
extern ulong executed_events; extern ulong executed_events;
extern char language[FN_REFLEN]; extern char language[FN_REFLEN];
...@@ -299,7 +300,7 @@ extern PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy; ...@@ -299,7 +300,7 @@ extern PSI_cond_key key_TC_LOG_MMAP_COND_queue_busy;
extern PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_queue, extern PSI_cond_key key_COND_rpl_thread, key_COND_rpl_thread_queue,
key_COND_rpl_thread_pool, key_COND_rpl_thread_pool,
key_COND_parallel_entry, key_COND_group_commit_orderer; key_COND_parallel_entry, key_COND_group_commit_orderer;
extern PSI_cond_key key_COND_wait_gtid; extern PSI_cond_key key_COND_wait_gtid, key_COND_gtid_ignore_duplicates;
extern PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert, extern PSI_thread_key key_thread_bootstrap, key_thread_delayed_insert,
key_thread_handle_manager, key_thread_kill_server, key_thread_main, key_thread_handle_manager, key_thread_kill_server, key_thread_main,
......
...@@ -33,7 +33,8 @@ const LEX_STRING rpl_gtid_slave_state_table_name= ...@@ -33,7 +33,8 @@ const LEX_STRING rpl_gtid_slave_state_table_name=
void void
rpl_slave_state::update_state_hash(uint64 sub_id, rpl_gtid *gtid) rpl_slave_state::update_state_hash(uint64 sub_id, rpl_gtid *gtid,
const Relay_log_info *rli)
{ {
int err; int err;
/* /*
...@@ -44,7 +45,7 @@ rpl_slave_state::update_state_hash(uint64 sub_id, rpl_gtid *gtid) ...@@ -44,7 +45,7 @@ rpl_slave_state::update_state_hash(uint64 sub_id, rpl_gtid *gtid)
it is even committed. it is even committed.
*/ */
mysql_mutex_lock(&LOCK_slave_state); mysql_mutex_lock(&LOCK_slave_state);
err= update(gtid->domain_id, gtid->server_id, sub_id, gtid->seq_no); err= update(gtid->domain_id, gtid->server_id, sub_id, gtid->seq_no, rli);
mysql_mutex_unlock(&LOCK_slave_state); mysql_mutex_unlock(&LOCK_slave_state);
if (err) if (err)
{ {
...@@ -76,17 +77,102 @@ rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi) ...@@ -76,17 +77,102 @@ rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi)
rgi->gtid_sub_id= 0; rgi->gtid_sub_id= 0;
if (record_gtid(thd, &rgi->current_gtid, sub_id, false, false)) if (record_gtid(thd, &rgi->current_gtid, sub_id, false, false))
DBUG_RETURN(1); DBUG_RETURN(1);
update_state_hash(sub_id, &rgi->current_gtid); update_state_hash(sub_id, &rgi->current_gtid, rgi->rli);
} }
DBUG_RETURN(0); DBUG_RETURN(0);
} }
/*
Check GTID event execution when --gtid-ignore-duplicates.
The idea with --gtid-ignore-duplicates is that we allow multiple master
connections (in multi-source replication) to all receive the same GTIDs and
event groups. Only one instance of each is applied; we use the sequence
number in the GTID to decide whether a GTID has already been applied.
So if the seq_no of a GTID (or a higher sequence number) has already been
applied, then the event should be skipped. If not then the event should be
applied.
To avoid two master connections tring to apply the same event
simultaneously, only one is allowed to work in any given domain at any point
in time. The associated Relay_log_info object is called the owner of the
domain (and there can be multiple parallel worker threads working in that
domain for that Relay_log_info). Any other Relay_log_info/master connection
must wait for the domain to become free, or for their GTID to have been
applied, before being allowed to proceed.
Returns:
0 This GTID is already applied, it should be skipped.
1 The GTID is not yet applied; this rli is now the owner, and must apply
the event and release the domain afterwards.
-1 Error (out of memory to allocate a new element for the domain).
*/
int
rpl_slave_state::check_duplicate_gtid(rpl_gtid *gtid, const Relay_log_info *rli)
{
uint32 domain_id= gtid->domain_id;
uint32 seq_no= gtid->seq_no;
rpl_slave_state::element *elem;
int res;
mysql_mutex_lock(&LOCK_slave_state);
if (!(elem= get_element(domain_id)))
{
res= -1;
goto err;
}
/*
Note that the elem pointer does not change once inserted in the hash. So
we can re-use the pointer without looking it up again in the hash after
each lock release and re-take.
*/
/* ToDo: Make this wait killable. */
for (;;)
{
if (elem->highest_seq_no >= seq_no)
{
/* This sequence number is already applied, ignore it. */
res= 0;
break;
}
if (!elem->owner_rli)
{
/* The domain became free, grab it and apply the event. */
elem->owner_rli= rli;
elem->owner_count= 1;
res= 1;
break;
}
if (elem->owner_rli == rli)
{
/* Already own this domain, increment reference count and apply event. */
++elem->owner_count;
res= 1;
break;
}
/*
Someone else is currently processing this GTID (or an earlier one).
Wait for them to complete (or fail), and then check again.
*/
mysql_cond_wait(&elem->COND_gtid_ignore_duplicates,
&LOCK_slave_state);
}
err:
mysql_mutex_unlock(&LOCK_slave_state);
return res;
}
static void static void
rpl_slave_state_free_element(void *arg) rpl_slave_state_free_element(void *arg)
{ {
struct rpl_slave_state::element *elem= (struct rpl_slave_state::element *)arg; struct rpl_slave_state::element *elem= (struct rpl_slave_state::element *)arg;
mysql_cond_destroy(&elem->COND_wait_gtid); mysql_cond_destroy(&elem->COND_wait_gtid);
mysql_cond_destroy(&elem->COND_gtid_ignore_duplicates);
my_free(elem); my_free(elem);
} }
...@@ -147,7 +233,7 @@ rpl_slave_state::deinit() ...@@ -147,7 +233,7 @@ rpl_slave_state::deinit()
int int
rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id, rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id,
uint64 seq_no) uint64 seq_no, const Relay_log_info *rli)
{ {
element *elem= NULL; element *elem= NULL;
list_element *list_elem= NULL; list_element *list_elem= NULL;
...@@ -170,6 +256,20 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id, ...@@ -170,6 +256,20 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id,
mysql_cond_broadcast(&elem->COND_wait_gtid); mysql_cond_broadcast(&elem->COND_wait_gtid);
} }
if (opt_gtid_ignore_duplicates && rli)
{
uint32 count= elem->owner_count;
DBUG_ASSERT(count > 0);
DBUG_ASSERT(elem->owner_rli == rli);
--count;
elem->owner_count= count;
if (count == 0)
{
elem->owner_rli= NULL;
mysql_cond_broadcast(&elem->COND_gtid_ignore_duplicates);
}
}
if (!(list_elem= (list_element *)my_malloc(sizeof(*list_elem), MYF(MY_WME)))) if (!(list_elem= (list_element *)my_malloc(sizeof(*list_elem), MYF(MY_WME))))
return 1; return 1;
list_elem->server_id= server_id; list_elem->server_id= server_id;
...@@ -199,7 +299,11 @@ rpl_slave_state::get_element(uint32 domain_id) ...@@ -199,7 +299,11 @@ rpl_slave_state::get_element(uint32 domain_id)
elem->domain_id= domain_id; elem->domain_id= domain_id;
elem->highest_seq_no= 0; elem->highest_seq_no= 0;
elem->gtid_waiter= NULL; elem->gtid_waiter= NULL;
elem->owner_rli= NULL;
elem->owner_count= 0;
mysql_cond_init(key_COND_wait_gtid, &elem->COND_wait_gtid, 0); mysql_cond_init(key_COND_wait_gtid, &elem->COND_wait_gtid, 0);
mysql_cond_init(key_COND_gtid_ignore_duplicates,
&elem->COND_gtid_ignore_duplicates, 0);
if (my_hash_insert(&hash, (uchar *)elem)) if (my_hash_insert(&hash, (uchar *)elem))
{ {
my_free(elem); my_free(elem);
...@@ -821,7 +925,7 @@ rpl_slave_state::load(THD *thd, char *state_from_master, size_t len, ...@@ -821,7 +925,7 @@ rpl_slave_state::load(THD *thd, char *state_from_master, size_t len,
if (gtid_parser_helper(&state_from_master, end, &gtid) || if (gtid_parser_helper(&state_from_master, end, &gtid) ||
!(sub_id= next_sub_id(gtid.domain_id)) || !(sub_id= next_sub_id(gtid.domain_id)) ||
record_gtid(thd, &gtid, sub_id, false, in_statement) || record_gtid(thd, &gtid, sub_id, false, in_statement) ||
update(gtid.domain_id, gtid.server_id, sub_id, gtid.seq_no)) update(gtid.domain_id, gtid.server_id, sub_id, gtid.seq_no, NULL))
return 1; return 1;
if (state_from_master == end) if (state_from_master == end)
break; break;
......
...@@ -91,6 +91,8 @@ struct gtid_waiting { ...@@ -91,6 +91,8 @@ struct gtid_waiting {
}; };
class Relay_log_info;
/* /*
Replication slave state. Replication slave state.
...@@ -131,6 +133,19 @@ struct rpl_slave_state ...@@ -131,6 +133,19 @@ struct rpl_slave_state
uint64 min_wait_seq_no; uint64 min_wait_seq_no;
mysql_cond_t COND_wait_gtid; mysql_cond_t COND_wait_gtid;
/*
For --gtid-ignore-duplicates. The Relay_log_info that currently owns
this domain, and the number of worker threads that are active in it.
The idea is that only one of multiple master connections is allowed to
actively apply events for a given domain. Other connections must either
discard the events (if the seq_no in GTID shows they have already been
applied), or wait to see if the current owner will apply it.
*/
const Relay_log_info *owner_rli;
uint32 owner_count;
mysql_cond_t COND_gtid_ignore_duplicates;
list_element *grab_list() { list_element *l= list; list= NULL; return l; } list_element *grab_list() { list_element *l= list; list= NULL; return l; }
void add(list_element *l) void add(list_element *l)
{ {
...@@ -155,7 +170,8 @@ struct rpl_slave_state ...@@ -155,7 +170,8 @@ struct rpl_slave_state
void deinit(); void deinit();
void truncate_hash(); void truncate_hash();
ulong count() const { return hash.records; } ulong count() const { return hash.records; }
int update(uint32 domain_id, uint32 server_id, uint64 sub_id, uint64 seq_no); int update(uint32 domain_id, uint32 server_id, uint64 sub_id,
uint64 seq_no, const Relay_log_info *rli);
int truncate_state_table(THD *thd); int truncate_state_table(THD *thd);
int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
bool in_transaction, bool in_statement); bool in_transaction, bool in_statement);
...@@ -171,8 +187,10 @@ struct rpl_slave_state ...@@ -171,8 +187,10 @@ struct rpl_slave_state
element *get_element(uint32 domain_id); element *get_element(uint32 domain_id);
int put_back_list(uint32 domain_id, list_element *list); int put_back_list(uint32 domain_id, list_element *list);
void update_state_hash(uint64 sub_id, rpl_gtid *gtid); void update_state_hash(uint64 sub_id, rpl_gtid *gtid,
const Relay_log_info *rli);
int record_and_update_gtid(THD *thd, struct rpl_group_info *rgi); int record_and_update_gtid(THD *thd, struct rpl_group_info *rgi);
int check_duplicate_gtid(rpl_gtid *gtid, const Relay_log_info *rli);
}; };
......
...@@ -202,7 +202,7 @@ handle_rpl_parallel_thread(void *arg) ...@@ -202,7 +202,7 @@ handle_rpl_parallel_thread(void *arg)
struct rpl_parallel_thread::queued_event *events; struct rpl_parallel_thread::queued_event *events;
bool group_standalone= true; bool group_standalone= true;
bool in_event_group= false; bool in_event_group= false;
bool group_skip_for_stop= false; bool skip_event_group= false;
rpl_group_info *group_rgi= NULL; rpl_group_info *group_rgi= NULL;
group_commit_orderer *gco, *tmp_gco; group_commit_orderer *gco, *tmp_gco;
uint64 event_gtid_sub_id= 0; uint64 event_gtid_sub_id= 0;
...@@ -385,13 +385,13 @@ handle_rpl_parallel_thread(void *arg) ...@@ -385,13 +385,13 @@ handle_rpl_parallel_thread(void *arg)
point where we can safely stop. So set a flag that will cause us point where we can safely stop. So set a flag that will cause us
to skip, rather than execute, the following events. to skip, rather than execute, the following events.
*/ */
group_skip_for_stop= true; skip_event_group= true;
} }
else else
group_skip_for_stop= false; skip_event_group= false;
if (unlikely(entry->stop_on_error_sub_id <= rgi->wait_commit_sub_id)) if (unlikely(entry->stop_on_error_sub_id <= rgi->wait_commit_sub_id))
group_skip_for_stop= true; skip_event_group= true;
else if (rgi->wait_commit_sub_id > entry->last_committed_sub_id) else if (rgi->wait_commit_sub_id > entry->last_committed_sub_id)
{ {
/* /*
...@@ -420,6 +420,16 @@ handle_rpl_parallel_thread(void *arg) ...@@ -420,6 +420,16 @@ handle_rpl_parallel_thread(void *arg)
thd->wait_for_commit_ptr->wakeup_subsequent_commits(err); thd->wait_for_commit_ptr->wakeup_subsequent_commits(err);
} }
thd->wait_for_commit_ptr= &rgi->commit_orderer; thd->wait_for_commit_ptr= &rgi->commit_orderer;
if (opt_gtid_ignore_duplicates)
{
int res=
rpl_global_gtid_slave_state.check_duplicate_gtid(&rgi->current_gtid,
rgi->rli);
/* ToDo: Handle res==-1 error. */
if (!res)
skip_event_group= true;
}
} }
group_ending= event_type == XID_EVENT || group_ending= event_type == XID_EVENT ||
...@@ -438,7 +448,7 @@ handle_rpl_parallel_thread(void *arg) ...@@ -438,7 +448,7 @@ handle_rpl_parallel_thread(void *arg)
processing between the event groups as a simple way to ensure that processing between the event groups as a simple way to ensure that
everything is stopped and cleaned up correctly. everything is stopped and cleaned up correctly.
*/ */
if (!rgi->is_error && !group_skip_for_stop) if (!rgi->is_error && !skip_event_group)
err= rpt_handle_event(events, rpt); err= rpt_handle_event(events, rpt);
else else
err= thd->wait_for_prior_commit(); err= thd->wait_for_prior_commit();
...@@ -464,7 +474,7 @@ handle_rpl_parallel_thread(void *arg) ...@@ -464,7 +474,7 @@ handle_rpl_parallel_thread(void *arg)
rgi->next= rgis_to_free; rgi->next= rgis_to_free;
rgis_to_free= rgi; rgis_to_free= rgi;
group_rgi= rgi= NULL; group_rgi= rgi= NULL;
group_skip_for_stop= false; skip_event_group= false;
DEBUG_SYNC(thd, "rpl_parallel_end_of_group"); DEBUG_SYNC(thd, "rpl_parallel_end_of_group");
} }
...@@ -526,7 +536,7 @@ handle_rpl_parallel_thread(void *arg) ...@@ -526,7 +536,7 @@ handle_rpl_parallel_thread(void *arg)
mysql_mutex_lock(&rpt->LOCK_rpl_thread); mysql_mutex_lock(&rpt->LOCK_rpl_thread);
rpt->free_rgi(group_rgi); rpt->free_rgi(group_rgi);
group_rgi= NULL; group_rgi= NULL;
group_skip_for_stop= false; skip_event_group= false;
} }
if (!in_event_group) if (!in_event_group)
{ {
......
...@@ -1435,7 +1435,8 @@ rpl_load_gtid_slave_state(THD *thd) ...@@ -1435,7 +1435,8 @@ rpl_load_gtid_slave_state(THD *thd)
if ((err= rpl_global_gtid_slave_state.update(tmp_entry.gtid.domain_id, if ((err= rpl_global_gtid_slave_state.update(tmp_entry.gtid.domain_id,
tmp_entry.gtid.server_id, tmp_entry.gtid.server_id,
tmp_entry.sub_id, tmp_entry.sub_id,
tmp_entry.gtid.seq_no))) tmp_entry.gtid.seq_no,
NULL)))
{ {
mysql_mutex_unlock(&rpl_global_gtid_slave_state.LOCK_slave_state); mysql_mutex_unlock(&rpl_global_gtid_slave_state.LOCK_slave_state);
my_error(ER_OUT_OF_RESOURCES, MYF(0)); my_error(ER_OUT_OF_RESOURCES, MYF(0));
......
...@@ -2047,6 +2047,39 @@ after_set_capability: ...@@ -2047,6 +2047,39 @@ after_set_capability:
} }
} }
query_str.length(0);
if (query_str.append(STRING_WITH_LEN("SET @slave_gtid_ignore_duplicates="),
system_charset_info) ||
query_str.append_ulonglong(opt_gtid_ignore_duplicates != false))
{
err_code= ER_OUTOFMEMORY;
errmsg= "The slave I/O thread stops because a fatal out-of-memory error "
"is encountered when it tries to set @slave_gtid_ignore_duplicates.";
sprintf(err_buff, "%s Error: Out of memory", errmsg);
goto err;
}
rc= mysql_real_query(mysql, query_str.ptr(), query_str.length());
if (rc)
{
err_code= mysql_errno(mysql);
if (is_network_error(err_code))
{
mi->report(ERROR_LEVEL, err_code,
"Setting @slave_gtid_ignore_duplicates failed with "
"error: %s", mysql_error(mysql));
goto network_err;
}
else
{
/* Fatal error */
errmsg= "The slave I/O thread stops because a fatal error is "
"encountered when it tries to set @slave_gtid_ignore_duplicates.";
sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
goto err;
}
}
if (mi->rli.until_condition == Relay_log_info::UNTIL_GTID) if (mi->rli.until_condition == Relay_log_info::UNTIL_GTID)
{ {
query_str.length(0); query_str.length(0);
......
This diff is collapsed.
...@@ -1819,6 +1819,54 @@ static Sys_var_ulong Sys_slave_parallel_max_queued( ...@@ -1819,6 +1819,54 @@ static Sys_var_ulong Sys_slave_parallel_max_queued(
"--slave-parallel-threads > 0.", "--slave-parallel-threads > 0.",
GLOBAL_VAR(opt_slave_parallel_max_queued), CMD_LINE(REQUIRED_ARG), GLOBAL_VAR(opt_slave_parallel_max_queued), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0,2147483647), DEFAULT(131072), BLOCK_SIZE(1)); VALID_RANGE(0,2147483647), DEFAULT(131072), BLOCK_SIZE(1));
static bool
check_gtid_ignore_duplicates(sys_var *self, THD *thd, set_var *var)
{
bool running;
mysql_mutex_lock(&LOCK_active_mi);
running= master_info_index->give_error_if_slave_running();
mysql_mutex_unlock(&LOCK_active_mi);
if (running)
return true;
return false;
}
static bool
fix_gtid_ignore_duplicates(sys_var *self, THD *thd, enum_var_type type)
{
bool running;
bool err= false;
mysql_mutex_unlock(&LOCK_global_system_variables);
mysql_mutex_lock(&LOCK_active_mi);
running= master_info_index->give_error_if_slave_running();
mysql_mutex_unlock(&LOCK_active_mi);
if (running)
err= true;
mysql_mutex_lock(&LOCK_global_system_variables);
/* ToDo: Isn't there a race here? I need to change the variable only under the LOCK_active_mi, and only if running is false. */
return err;
}
static Sys_var_mybool Sys_gtid_ignore_duplicates(
"gtid_ignore_duplicates",
"When set, different master connections in multi-source replication are "
"allowed to receive and process event groups with the same GTID (when "
"using GTID mode). Only one will be applied, any others will be "
"ignored. Within a given replication domain, just the sequence number "
"will be used to decide whether a given GTID has been already applied; "
"this means it is the responsibility of the user to ensure that GTID "
"sequence numbers are strictly increasing.",
GLOBAL_VAR(opt_gtid_ignore_duplicates), CMD_LINE(OPT_ARG),
DEFAULT(FALSE), NO_MUTEX_GUARD,
NOT_IN_BINLOG, ON_CHECK(check_gtid_ignore_duplicates),
ON_UPDATE(fix_gtid_ignore_duplicates));
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment