Commit 5aa0d185 authored by unknown's avatar unknown

MDEV-4473: mysql_binlog_send() starts sending events from wrong GTID position...

MDEV-4473: mysql_binlog_send() starts sending events from wrong GTID position in some master failover scenarios

Suppose binlog file X has in its Gtid_list_event: 0-1-3,0-2-5, and suppose the
slave requests to start replicating after 0-1-3.

In this case the bug was that master would start sending events from the start
of X. This is wrong, because 0-2-4 and 0-2-5 are contained in X-1, and are
needed by the slave. So these events were lost.

On the other hand, if the slave requested 0-2-5, then it _is_ correct to start
sending from the beginning of binlog file X, because 0-2-5 is the last GTID
logged in earlier binlogs. The difference is that 0-2-5 is the last of the
GTIDs in the Gtid_list_event. The problem was that the code did not check that
the matched GTID was the last one in the list.

Fixed by checking if the gtid requested by slave that matches a gtid in the
Gtid_list_event is the last event for that domain in the list. If not, go back
to a prior binlog to ensure all needed events are sent to slave.

mysql-test/include/show_events.inc:
  Backport --let $binlog_file=LAST, used by MDEV-4473 test case.
parent eb75edfb
...@@ -18,7 +18,19 @@ if ($is_relay_log) ...@@ -18,7 +18,19 @@ if ($is_relay_log)
if ($binlog_file) if ($binlog_file)
{ {
--let $_statement= $_statement in '$binlog_file' --let $_binlog_file= $binlog_file
if ($_binlog_file == 'LAST')
{
if ($is_relay_log)
{
--let $_binlog_file= query_get_value(SHOW SLAVE STATUS, Relay_Log_File, 1)
}
if (!$is_relay_log)
{
--let $_binlog_file= query_get_value(SHOW MASTER STATUS, File, 1)
}
}
--let $_statement= $_statement in '$_binlog_file'
} }
if (!$binlog_start) if (!$binlog_start)
......
include/rpl_init.inc [topology=1->2,1->3]
create table t1 (n int);
insert into t1 values (1);
insert into t1 values (2);
include/stop_slave.inc
include/wait_for_slave_to_stop.inc
include/stop_slave.inc
include/wait_for_slave_to_stop.inc
reset slave all;
CHANGE MASTER TO master_host = '127.0.0.1', master_port = SERVER_MYPORT_2,
master_user='root', MASTER_USE_GTID=1;
include/start_slave.inc
include/wait_for_slave_to_start.inc
flush logs;
insert into t1 values (3);
insert into t1 values (4);
flush logs;
CHANGE MASTER TO master_host = '127.0.0.1', master_port = SERVER_MYPORT_2,
MASTER_USE_GTID=1;
include/start_slave.inc
select * from t1 order by n;
n
1
2
3
4
show binary logs;
Log_name File_size
master-bin.000001 #
show binlog events in 'master-bin.000001' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; create table t1 (n int)
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Query # # use `test`; insert into t1 values (1)
master-bin.000001 # Query # # COMMIT
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Query # # use `test`; insert into t1 values (2)
master-bin.000001 # Query # # COMMIT
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Query # # use `test`; insert into t1 values (3)
master-bin.000001 # Query # # COMMIT
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Query # # use `test`; insert into t1 values (4)
master-bin.000001 # Query # # COMMIT
include/stop_slave.inc
include/wait_for_slave_to_stop.inc
reset slave all;
CHANGE MASTER TO master_host = '127.0.0.1', master_port = SERVER_MYPORT_1,
master_user = 'root', MASTER_USE_GTID=1;
include/start_slave.inc
include/stop_slave.inc
CHANGE MASTER TO master_host = '127.0.0.1', master_port = SERVER_MYPORT_1,
MASTER_USE_GTID=1;
include/start_slave.inc
drop table t1;
include/rpl_end.inc
!include ../my.cnf
[mysqld.1]
log-slave-updates
loose-innodb
[mysqld.2]
log-slave-updates
loose-innodb
[mysqld.3]
log-slave-updates
loose-innodb
[ENV]
SERVER_MYPORT_3= @mysqld.3.port
SERVER_MYSOCK_3= @mysqld.3.socket
--source include/have_innodb.inc
--source include/have_binlog_format_mixed.inc
--let $rpl_topology=1->2,1->3
--source include/rpl_init.inc
connection server_1;
create table t1 (n int);
insert into t1 values (1);
insert into t1 values (2);
save_master_pos;
connection server_3;
sync_with_master;
source include/stop_slave.inc;
source include/wait_for_slave_to_stop.inc;
connection server_2;
sync_with_master;
source include/stop_slave.inc;
source include/wait_for_slave_to_stop.inc;
reset slave all;
connection server_1;
--replace_result $SERVER_MYPORT_2 SERVER_MYPORT_2
eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $SERVER_MYPORT_2,
master_user='root', MASTER_USE_GTID=1;
source include/start_slave.inc;
source include/wait_for_slave_to_start.inc;
connection server_2;
flush logs;
insert into t1 values (3);
insert into t1 values (4);
flush logs;
save_master_pos;
connection server_3;
--replace_result $SERVER_MYPORT_2 SERVER_MYPORT_2
eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $SERVER_MYPORT_2,
MASTER_USE_GTID=1;
source include/start_slave.inc;
sync_with_master;
select * from t1 order by n;
source include/show_binary_logs.inc;
let $binlog_file=LAST;
source include/show_binlog_events.inc;
connection server_1;
source include/stop_slave.inc;
source include/wait_for_slave_to_stop.inc;
reset slave all;
connection server_2;
--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1
eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $SERVER_MYPORT_1,
master_user = 'root', MASTER_USE_GTID=1;
source include/start_slave.inc;
connection server_3;
source include/stop_slave.inc;
--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1
eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $SERVER_MYPORT_1,
MASTER_USE_GTID=1;
source include/start_slave.inc;
connection server_1;
drop table t1;
--source include/rpl_end.inc
...@@ -703,7 +703,12 @@ get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list) ...@@ -703,7 +703,12 @@ get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list)
to build an in-memory hash or stuff like that. to build an in-memory hash or stuff like that.
We need to check that slave did not request GTID D-S-N1, when the We need to check that slave did not request GTID D-S-N1, when the
Gtid_list_log_event for this binlog file has D-S-N2 with N2 > N1. Gtid_list_log_event for this binlog file has D-S-N2 with N2 >= N1.
(Because this means that requested GTID is in an earlier binlog).
However, if the Gtid_list_log_event indicates that D-S-N1 is the very last
GTID for domain D in prior binlog files, then it is ok to start from the
very start of this binlog file. This special case is important, as it
allows to purge old logs even if some domain is unused for long.
In addition, we need to check that we do not have a GTID D-S-N3 in the In addition, we need to check that we do not have a GTID D-S-N3 in the
Gtid_list_log_event where D is not present in the requested slave state at Gtid_list_log_event where D is not present in the requested slave state at
...@@ -717,7 +722,8 @@ contains_all_slave_gtid(slave_connection_state *st, Gtid_list_log_event *glev) ...@@ -717,7 +722,8 @@ contains_all_slave_gtid(slave_connection_state *st, Gtid_list_log_event *glev)
for (i= 0; i < glev->count; ++i) for (i= 0; i < glev->count; ++i)
{ {
const rpl_gtid *gtid= st->find(glev->list[i].domain_id); uint32 gl_domain_id= glev->list[i].domain_id;
const rpl_gtid *gtid= st->find(gl_domain_id);
if (!gtid) if (!gtid)
{ {
/* /*
...@@ -727,13 +733,28 @@ contains_all_slave_gtid(slave_connection_state *st, Gtid_list_log_event *glev) ...@@ -727,13 +733,28 @@ contains_all_slave_gtid(slave_connection_state *st, Gtid_list_log_event *glev)
return false; return false;
} }
if (gtid->server_id == glev->list[i].server_id && if (gtid->server_id == glev->list[i].server_id &&
gtid->seq_no < glev->list[i].seq_no) gtid->seq_no <= glev->list[i].seq_no)
{ {
/* /*
The slave needs to receive gtid, but it is contained in an earlier The slave needs to start after gtid, but it is contained in an earlier
binlog file. So we need to search back further. binlog file. So we need to search back further, unless it was the very
last gtid logged for the domain in earlier binlog files.
*/ */
return false; if (gtid->seq_no < glev->list[i].seq_no)
return false;
/*
The slave requested D-S-N1, which happens to be the last GTID logged
in prior binlog files with same domain id D and server id S.
The Gtid_list is kept sorted on domain_id, with the last GTID in each
domain_id group being the last one logged. So if this is the last GTID
within the domain_id group, then it is ok to start from the very
beginning of this group, per the special case explained in comment at
the start of this function. If not, then we need to search back further.
*/
if (i+1 < glev->count && gl_domain_id == glev->list[i+1].domain_id)
return false;
} }
} }
...@@ -997,7 +1018,15 @@ gtid_find_binlog_file(slave_connection_state *state, char *out_name) ...@@ -997,7 +1018,15 @@ gtid_find_binlog_file(slave_connection_state *state, char *out_name)
const rpl_gtid *gtid= state->find(glev->list[i].domain_id); const rpl_gtid *gtid= state->find(glev->list[i].domain_id);
if (!gtid) if (!gtid)
{ {
/* contains_all_slave_gtid() would have returned false if so. */ /*
contains_all_slave_gtid() returns false if there is any domain in
Gtid_list_event which is not in the requested slave position.
We may delete a domain from the slave state inside this loop, but
we only do this when it is the very last GTID logged for that
domain in earlier binlogs, and then we can not encounter it in any
further GTIDs in the Gtid_list.
*/
DBUG_ASSERT(0); DBUG_ASSERT(0);
continue; continue;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment