Commit 9150a0c7 authored by Kristian Nielsen's avatar Kristian Nielsen

MDEV-4937: sql_slave_skip_counter does not work with GTID

The sql_slave_skip_counter is important to be able to recover replication from
certain errors. Often, an appropriate solution is to set
sql_slave_skip_counter to skip over a problem event. But setting
sql_slave_skip_counter produced an error in GTID mode, with a suggestion to
instead set @@gtid_slave_pos to point past the problem event. This however is
not always possible; for example, in case of an INCIDENT event, that event
does not have any GTID to assign to @@gtid_slave_pos.

With this patch, sql_slave_skip_counter now works in GTID mode the same was as
in non-GTID mode. When set, that many initial events are skipped when the SQL
thread starts, plus as many extra events are needed to completely skip any
partially skipped event group. The GTID position is updated to point past the
skipped event(s).
parent a985ac3a
......@@ -61,13 +61,7 @@ include/stop_slave.inc
INSERT INTO t1 VALUES (5, "m1a");
INSERT INTO t2 VALUES (5, "i1a");
CHANGE MASTER TO master_host = '127.0.0.1', master_port = MASTER_PORT,
MASTER_USE_GTID=SLAVE_POS;
SET GLOBAL sql_slave_skip_counter=1;
ERROR HY000: When using GTID, @@sql_slave_skip_counter can not be used. Instead, setting @@gtid_slave_pos explicitly can be used to skip to after a given GTID position.
CHANGE MASTER TO master_host = '127.0.0.1', master_port = MASTER_PORT,
MASTER_USE_GTID=CURRENT_POS;
SET GLOBAL sql_slave_skip_counter=10;
ERROR HY000: When using GTID, @@sql_slave_skip_counter can not be used. Instead, setting @@gtid_slave_pos explicitly can be used to skip to after a given GTID position.
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a b
......@@ -322,5 +316,108 @@ master_gtid_wait('2-1-10')
0
master_gtid_wait('2-1-10')
0
*** Test sql_gtid_slave_pos when used with GTID ***
include/stop_slave.inc
SET gtid_domain_id=2;
SET gtid_seq_no=1000;
INSERT INTO t1 VALUES (10);
INSERT INTO t1 VALUES (11);
SET sql_slave_skip_counter= 1;
include/start_slave.inc
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
a
11
SELECT IF(LOCATE("2-1-1001", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1001 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
status
Ok
include/stop_slave.inc
SET gtid_domain_id=2;
SET gtid_seq_no=1010;
INSERT INTO t1 VALUES (12);
INSERT INTO t1 VALUES (13);
SET sql_slave_skip_counter= 2;
include/start_slave.inc
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
a
11
13
SELECT IF(LOCATE("2-1-1011", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1011 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
status
Ok
include/stop_slave.inc
SET gtid_domain_id=2;
SET gtid_seq_no=1020;
INSERT INTO t1 VALUES (14);
INSERT INTO t1 VALUES (15);
INSERT INTO t1 VALUES (16);
SET sql_slave_skip_counter= 3;
include/start_slave.inc
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
a
11
13
15
16
SELECT IF(LOCATE("2-1-1022", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1022 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
status
Ok
include/stop_slave.inc
SET gtid_domain_id=2;
SET gtid_seq_no=1030;
INSERT INTO t1 VALUES (17);
INSERT INTO t1 VALUES (18);
INSERT INTO t1 VALUES (19);
SET sql_slave_skip_counter= 5;
include/start_slave.inc
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
a
11
13
15
16
19
SELECT IF(LOCATE("2-1-1032", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1032 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
status
Ok
include/stop_slave.inc
SET gtid_domain_id=3;
SET gtid_seq_no=100;
CREATE TABLE t2 (a INT PRIMARY KEY);
DROP TABLE t2;
SET gtid_domain_id=2;
SET gtid_seq_no=1040;
INSERT INTO t1 VALUES (20);
SET @saved_mode= @@GLOBAL.slave_ddl_exec_mode;
SET GLOBAL slave_ddl_exec_mode=STRICT;
SET sql_slave_skip_counter=1;
START SLAVE UNTIL master_gtid_pos="3-1-100";
include/sync_with_master_gtid.inc
include/wait_for_slave_sql_to_stop.inc
SELECT * FROM t2;
ERROR 42S02: Table 'test.t2' doesn't exist
SELECT IF(LOCATE("3-1-100", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 3-1-100 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
status
Ok
SET sql_log_bin=0;
CALL mtr.add_suppression("Slave: Unknown table 'test\\.t2' Error_code: 1051");
SET sql_log_bin=1;
START SLAVE;
include/wait_for_slave_sql_error.inc [errno=1051]
SELECT IF(LOCATE("3-1-100", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 3-1-100 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
status
Ok
STOP SLAVE IO_THREAD;
SET sql_slave_skip_counter=2;
include/start_slave.inc
SELECT * FROM t1 WHERE a >= 20 ORDER BY a;
a
20
SELECT IF(LOCATE("3-1-101", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 3-1-101 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
status
Ok
SELECT IF(LOCATE("2-1-1040", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1040 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
status
Ok
SET GLOBAL slave_ddl_exec_mode= @saved_mode;
DROP TABLE t1;
include/rpl_end.inc
......@@ -29,12 +29,8 @@ REPLACE INTO t1 VALUES (5);
SET debug_dbug= @dbug_save;
include/wait_for_slave_sql_error.inc [errno=1590]
include/stop_slave.inc
CHANGE MASTER TO master_use_gtid=no;
SET sql_slave_skip_counter=1;
include/start_slave.inc
include/stop_slave.inc
CHANGE MASTER TO master_use_gtid=slave_pos;
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
......
......@@ -68,16 +68,8 @@ save_master_pos;
connection server_4;
--replace_result $MASTER_MYPORT MASTER_PORT
eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $MASTER_MYPORT,
MASTER_USE_GTID=SLAVE_POS;
# Test that sql_slave_skip_counter is prevented in GTID mode.
--error ER_SLAVE_SKIP_NOT_IN_GTID
SET GLOBAL sql_slave_skip_counter=1;
--replace_result $MASTER_MYPORT MASTER_PORT
eval CHANGE MASTER TO master_host = '127.0.0.1', master_port = $MASTER_MYPORT,
MASTER_USE_GTID=CURRENT_POS;
--error ER_SLAVE_SKIP_NOT_IN_GTID
SET GLOBAL sql_slave_skip_counter=10;
--source include/start_slave.inc
sync_with_master;
SELECT * FROM t1 ORDER BY a;
......@@ -374,6 +366,120 @@ reap;
reap;
--echo *** Test sql_gtid_slave_pos when used with GTID ***
--connection server_2
--source include/stop_slave.inc
--connection server_1
SET gtid_domain_id=2;
SET gtid_seq_no=1000;
INSERT INTO t1 VALUES (10);
INSERT INTO t1 VALUES (11);
--save_master_pos
--connection server_2
SET sql_slave_skip_counter= 1;
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
SELECT IF(LOCATE("2-1-1001", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1001 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
--source include/stop_slave.inc
--connection server_1
SET gtid_domain_id=2;
SET gtid_seq_no=1010;
INSERT INTO t1 VALUES (12);
INSERT INTO t1 VALUES (13);
--save_master_pos
--connection server_2
SET sql_slave_skip_counter= 2;
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
SELECT IF(LOCATE("2-1-1011", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1011 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
--source include/stop_slave.inc
--connection server_1
SET gtid_domain_id=2;
SET gtid_seq_no=1020;
INSERT INTO t1 VALUES (14);
INSERT INTO t1 VALUES (15);
INSERT INTO t1 VALUES (16);
--save_master_pos
--connection server_2
SET sql_slave_skip_counter= 3;
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
SELECT IF(LOCATE("2-1-1022", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1022 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
--source include/stop_slave.inc
--connection server_1
SET gtid_domain_id=2;
SET gtid_seq_no=1030;
INSERT INTO t1 VALUES (17);
INSERT INTO t1 VALUES (18);
INSERT INTO t1 VALUES (19);
--save_master_pos
--connection server_2
SET sql_slave_skip_counter= 5;
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
SELECT IF(LOCATE("2-1-1032", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1032 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
--source include/stop_slave.inc
--connection server_1
SET gtid_domain_id=3;
SET gtid_seq_no=100;
CREATE TABLE t2 (a INT PRIMARY KEY);
DROP TABLE t2;
SET gtid_domain_id=2;
SET gtid_seq_no=1040;
INSERT INTO t1 VALUES (20);
--save_master_pos
--connection server_2
SET @saved_mode= @@GLOBAL.slave_ddl_exec_mode;
SET GLOBAL slave_ddl_exec_mode=STRICT;
SET sql_slave_skip_counter=1;
START SLAVE UNTIL master_gtid_pos="3-1-100";
--let $master_pos=3-1-100
--source include/sync_with_master_gtid.inc
--source include/wait_for_slave_sql_to_stop.inc
--error ER_NO_SUCH_TABLE
SELECT * FROM t2;
SELECT IF(LOCATE("3-1-100", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 3-1-100 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
# Start the slave again, it should fail on the DROP TABLE as the table is not there.
SET sql_log_bin=0;
CALL mtr.add_suppression("Slave: Unknown table 'test\\.t2' Error_code: 1051");
SET sql_log_bin=1;
START SLAVE;
--let $slave_sql_errno=1051
--source include/wait_for_slave_sql_error.inc
SELECT IF(LOCATE("3-1-100", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 3-1-100 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
STOP SLAVE IO_THREAD;
SET sql_slave_skip_counter=2;
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 WHERE a >= 20 ORDER BY a;
SELECT IF(LOCATE("3-1-101", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 3-1-101 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
SELECT IF(LOCATE("2-1-1040", @@GLOBAL.gtid_slave_pos)>0, "Ok", CONCAT("ERROR! expected GTID 2-1-1040 not found in gtid_slave_pos: ", @@GLOBAL.gtid_slave_pos)) AS status;
SET GLOBAL slave_ddl_exec_mode= @saved_mode;
--connection server_1
DROP TABLE t1;
......
......@@ -46,15 +46,10 @@ SET debug_dbug= @dbug_save;
--connection slave
--let $slave_sql_errno=1590
--source include/wait_for_slave_sql_error.inc
# ToDo no need to switch off GTID once MDEV-4937 is fixed
--source include/stop_slave.inc
CHANGE MASTER TO master_use_gtid=no;
SET sql_slave_skip_counter=1;
--source include/start_slave.inc
--sync_with_master
--source include/stop_slave.inc
CHANGE MASTER TO master_use_gtid=slave_pos;
--source include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
......
......@@ -1084,7 +1084,6 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset,
DBUG_ASSERT(rli->slave_running == 0);
DBUG_ASSERT(rli->mi->slave_running == 0);
rli->slave_skip_counter=0;
mysql_mutex_lock(&rli->data_lock);
/*
......@@ -1565,8 +1564,8 @@ event_group_new_gtid(rpl_group_info *rgi, Gtid_log_event *gev)
return 1;
}
rgi->gtid_sub_id= sub_id;
rgi->current_gtid.server_id= gev->server_id;
rgi->current_gtid.domain_id= gev->domain_id;
rgi->current_gtid.server_id= gev->server_id;
rgi->current_gtid.seq_no= gev->seq_no;
return 0;
}
......
......@@ -3521,9 +3521,6 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
if (opt_gtid_ignore_duplicates)
{
serial_rgi->current_gtid.domain_id= gev->domain_id;
serial_rgi->current_gtid.server_id= gev->server_id;
serial_rgi->current_gtid.seq_no= gev->seq_no;
int res= rpl_global_gtid_slave_state.check_duplicate_gtid
(&serial_rgi->current_gtid, serial_rgi);
if (res < 0)
......@@ -4366,6 +4363,7 @@ pthread_handler_t handle_slave_sql(void *arg)
char saved_master_log_name[FN_REFLEN];
my_off_t UNINIT_VAR(saved_log_pos);
my_off_t UNINIT_VAR(saved_master_log_pos);
String saved_skip_gtid_pos;
my_off_t saved_skip= 0;
Master_info *mi= ((Master_info*)arg);
Relay_log_info* rli = &mi->rli;
......@@ -4571,6 +4569,12 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME,
strmake_buf(saved_master_log_name, rli->group_master_log_name);
saved_log_pos= rli->group_relay_log_pos;
saved_master_log_pos= rli->group_master_log_pos;
if (mi->using_gtid != Master_info::USE_GTID_NO)
{
saved_skip_gtid_pos.append(STRING_WITH_LEN(", GTID '"));
rpl_append_gtid_state(&saved_skip_gtid_pos, false);
saved_skip_gtid_pos.append(STRING_WITH_LEN("'; "));
}
saved_skip= rli->slave_skip_counter;
}
if ((rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ||
......@@ -4594,16 +4598,27 @@ log '%s' at position %s, relay log '%s' position: %s%s", RPL_LOG_NAME,
if (saved_skip && rli->slave_skip_counter == 0)
{
String tmp;
if (mi->using_gtid != Master_info::USE_GTID_NO)
{
tmp.append(STRING_WITH_LEN(", GTID '"));
rpl_append_gtid_state(&tmp, false);
tmp.append(STRING_WITH_LEN("'; "));
}
sql_print_information("'SQL_SLAVE_SKIP_COUNTER=%ld' executed at "
"relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
"master_log_pos='%ld' and new position at "
"master_log_pos='%ld'%s and new position at "
"relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
"master_log_pos='%ld' ",
"master_log_pos='%ld'%s ",
(ulong) saved_skip, saved_log_name, (ulong) saved_log_pos,
saved_master_log_name, (ulong) saved_master_log_pos,
saved_skip_gtid_pos.c_ptr_safe(),
rli->group_relay_log_name, (ulong) rli->group_relay_log_pos,
rli->group_master_log_name, (ulong) rli->group_master_log_pos);
rli->group_master_log_name, (ulong) rli->group_master_log_pos,
tmp.c_ptr_safe());
saved_skip= 0;
saved_skip_gtid_pos.free();
}
if (exec_relay_log_event(thd, rli, serial_rgi))
......
......@@ -3075,6 +3075,7 @@ int reset_slave(THD *thd, Master_info* mi)
mi->clear_error();
mi->rli.clear_error();
mi->rli.clear_until_condition();
mi->rli.slave_skip_counter= 0;
// close master_info_file, relay_log_info_file, set mi->inited=rli->inited=0
end_master_info(mi);
......@@ -3522,6 +3523,7 @@ bool change_master(THD* thd, Master_info* mi, bool *master_info_added)
/* Clear the errors, for a clean start */
mi->rli.clear_error();
mi->rli.clear_until_condition();
mi->rli.slave_skip_counter= 0;
sql_print_information("'CHANGE MASTER TO executed'. "
"Previous state master_host='%s', master_port='%u', master_log_file='%s', "
......
......@@ -4287,11 +4287,6 @@ bool update_multi_source_variable(sys_var *self_var, THD *thd,
static bool update_slave_skip_counter(sys_var *self, THD *thd, Master_info *mi)
{
if (mi->using_gtid != Master_info::USE_GTID_NO)
{
my_error(ER_SLAVE_SKIP_NOT_IN_GTID, MYF(0));
return true;
}
if (mi->rli.slave_running)
{
my_error(ER_SLAVE_MUST_STOP, MYF(0), mi->connection_name.length,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment