Commit 5cb486d1 authored by unknown's avatar unknown

MDEV-26: Global transaction ID.

Fix problems related to reconnect. When we need to reconnect (ie. explict
stop/start of just the IO thread by user, or automatic reconnect due to
loosing network connection with the master), it is a bit complex to correctly
resume at the right point without causing duplicate or missing events in the
relay log. The previous code had multiple problems in this regard.

With this patch, the problem is solved as follows. The IO thread keeps track
(in memory) of which GTID was last queued to the relay log. If it needs to
reconnect, it resumes at that GTID position. It also counts number of events
received within the last, possibly partial, event group, and skips the same
number of events after a reconnect, so that events already enqueued before the
reconnect are not duplicated.

(There is no need to keep any persistent state; whenever we restart slave
threads after both of them being stopped (such as after server restart), we
erase the relay logs and start over from the last GTID applied by SQL thread.
But while the SQL thread is running, this patch is needed to get correct relay
log).
parent 7ad47ab0
# ==== Purpose ====
#
# Terminate all binlog dump threads on a master.
#
# This is sometimes useful, as normally such dump threads can hang
# around for some time before they notice that the slave has disconnected.
#
# Note that if there are active slave connections, they might try to
# reconnect as their dump threads are killed, which may not lead to the
# desired results.
#
#
# ==== Usage ====
#
# [--let $kill_timeout= NUMBER]
# --source include/stop_slavekill_binlog_dump_threads.inc
#
# Parameters:
# $kill_timeout
# Maximum number of seconds to wait for dump threads to disappear.
--let $include_filename= kill_binlog_dump_threads.inc
--source include/begin_include_file.inc
--disable_query_log
let $wait_counter= 300;
if ($kill_timeout)
{
let $wait_counter= `SELECT $kill_timeout * 10`;
}
let $success= 0;
while ($wait_counter)
{
dec $wait_counter;
let $_tid= `SELECT id FROM information_schema.processlist WHERE command = 'Binlog Dump' LIMIT 1`;
if ($_tid)
{
eval KILL QUERY $_tid;
}
if (!$_tid)
{
let $wait_counter= 0;
let $success= 1;
}
if (!$success)
{
real_sleep 0.1;
}
}
if (!$success)
{
SHOW FULL PROCESSLIST;
--die Timeout while waiting for binlog dump threads to disappear.
}
--enable_query_log
--let $include_filename= kill_binlog_dump_threads.inc
--source include/end_include_file.inc
include/rpl_init.inc [topology=1->2]
include/stop_slave.inc
CHANGE MASTER TO master_use_gtid= current_pos;
include/start_slave.inc
CREATE TABLE t1 (a INT);
FLUSH LOGS;
SET gtid_domain_id=10;
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
SET gtid_seq_no=100;
INSERT INTO t1 VALUES (3);
INSERT INTO t1 VALUES (4);
INSERT INTO t1 VALUES (5);
include/stop_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
5
include/kill_binlog_dump_threads.inc
INSERT INTO t1 VALUES (10);
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
5
10
include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
RESET MASTER;
SET gtid_domain_id=10;
SET gtid_seq_no=50;
INSERT INTO t1 VALUES (1);
SET gtid_domain_id=11;
INSERT INTO t1 VALUES (11);
SET gtid_domain_id=10;
SET gtid_seq_no=100;
INSERT INTO t1 VALUES (2);
SET gtid_domain_id=11;
INSERT INTO t1 VALUES (12);
SET gtid_domain_id=10;
INSERT INTO t1 VALUES (3);
SET gtid_domain_id=11;
SET gtid_seq_no=200;
INSERT INTO t1 VALUES (13);
START SLAVE UNTIL master_gtid_pos="10-1-50,11-1-200";
include/wait_for_slave_to_stop.inc
SELECT * FROM t1 ORDER BY a;
a
1
11
12
13
include/kill_binlog_dump_threads.inc
INSERT INTO t1 VALUES (20);
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
11
12
13
20
include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
RESET MASTER;
include/kill_binlog_dump_threads.inc
SET gtid_domain_id= 9;
SET gtid_seq_no= 50;
INSERT INTO t1 VALUES (1);
SET gtid_domain_id= 10;
INSERT INTO t1 VALUES (11);
SET gtid_domain_id= 9;
INSERT INTO t1 VALUES (2);
SET gtid_domain_id= 10;
SET gtid_seq_no= 100;
INSERT INTO t1 VALUES (12);
SET gtid_domain_id= 9;
INSERT INTO t1 VALUES (3);
SET gtid_domain_id= 10;
SET gtid_seq_no= 200;
INSERT INTO t1 VALUES (13);
SET gtid_domain_id= 10;
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
START SLAVE UNTIL master_gtid_pos="9-1-50,10-1-200";
include/wait_for_slave_to_stop.inc
SELECT * FROM t1 ORDER BY a;
a
1
11
12
13
SET GLOBAL debug_dbug= @old_debug;
INSERT INTO t1 VALUES (20);
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a
1
2
3
11
12
13
20
*** Test when slave IO thread needs to reconnect in the middle of an event group. ***
include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
RESET MASTER;
include/kill_binlog_dump_threads.inc
SET GLOBAL debug_dbug="+d,binlog_force_reconnect_after_22_events";
CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t2 VALUES (1);
BEGIN;
INSERT INTO t2 VALUES (10);
INSERT INTO t2 VALUES (11);
COMMIT;
BEGIN;
INSERT INTO t2 VALUES (20);
INSERT INTO t2 VALUES (21);
INSERT INTO t2 VALUES (22);
INSERT INTO t2 VALUES (23);
INSERT INTO t2 VALUES (24);
INSERT INTO t2 VALUES (25);
INSERT INTO t2 VALUES (26);
INSERT INTO t2 VALUES (27);
INSERT INTO t2 VALUES (28);
INSERT INTO t2 VALUES (29);
COMMIT;
include/start_slave.inc
SELECT * FROM t2 ORDER BY a;
a
1
10
11
20
21
22
23
24
25
26
27
28
29
SET GLOBAL debug_dbug= @old_debug;
DROP TABLE t1, t2;
include/rpl_end.inc
--let $rpl_topology=1->2
--source include/rpl_init.inc
--source include/have_innodb.inc
--source include/have_debug.inc
--connection server_2
--source include/stop_slave.inc
CHANGE MASTER TO master_use_gtid= current_pos;
--source include/start_slave.inc
--connection server_1
CREATE TABLE t1 (a INT);
FLUSH LOGS;
--save_master_pos
--connection server_2
--sync_with_master
# Prepare a string of events and have the slave replicate all of it.
--connection server_1
SET gtid_domain_id=10;
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
SET gtid_seq_no=100;
INSERT INTO t1 VALUES (3);
INSERT INTO t1 VALUES (4);
INSERT INTO t1 VALUES (5);
--save_master_pos
--connection server_2
--sync_with_master
--source include/stop_slave.inc
SELECT * FROM t1 ORDER BY a;
# Now start the slave again, but force a reconnect. There was a bug that this
# reconnect would cause duplicate events.
--connection server_1
# Make sure to get rid of any old binlog dump thread so it does not
# interfere with our DBUG error injection.
--source include/kill_binlog_dump_threads.inc
INSERT INTO t1 VALUES (10);
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
--save_master_pos
--connection server_2
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--source include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
RESET MASTER;
# A1 B1 A2 B2 A3 B3, slave reached A1 and B3 and stopped. Slave starts,
# reconnects at A2. There was a bug that B2 would be duplicated.
SET gtid_domain_id=10;
SET gtid_seq_no=50;
INSERT INTO t1 VALUES (1);
SET gtid_domain_id=11;
INSERT INTO t1 VALUES (11);
SET gtid_domain_id=10;
SET gtid_seq_no=100;
INSERT INTO t1 VALUES (2);
SET gtid_domain_id=11;
INSERT INTO t1 VALUES (12);
SET gtid_domain_id=10;
INSERT INTO t1 VALUES (3);
SET gtid_domain_id=11;
SET gtid_seq_no=200;
INSERT INTO t1 VALUES (13);
--connection server_2
START SLAVE UNTIL master_gtid_pos="10-1-50,11-1-200";
--source include/wait_for_slave_to_stop.inc
SELECT * FROM t1 ORDER BY a;
--connection server_1
--source include/kill_binlog_dump_threads.inc
INSERT INTO t1 VALUES (20);
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
--save_master_pos
--connection server_2
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--source include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
RESET MASTER;
# A1 B1 A2 B2 A3 B3. START SLAVE UNTIL A1,B3, gets reconnect at B2.
# There was a bug that the UNTIL would be ignored, and A2 would be lost.
--source include/kill_binlog_dump_threads.inc
SET gtid_domain_id= 9;
SET gtid_seq_no= 50;
INSERT INTO t1 VALUES (1);
SET gtid_domain_id= 10;
INSERT INTO t1 VALUES (11);
SET gtid_domain_id= 9;
INSERT INTO t1 VALUES (2);
SET gtid_domain_id= 10;
SET gtid_seq_no= 100;
INSERT INTO t1 VALUES (12);
SET gtid_domain_id= 9;
INSERT INTO t1 VALUES (3);
SET gtid_domain_id= 10;
SET gtid_seq_no= 200;
INSERT INTO t1 VALUES (13);
SET gtid_domain_id= 10;
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
--connection server_2
START SLAVE UNTIL master_gtid_pos="9-1-50,10-1-200";
--source include/wait_for_slave_to_stop.inc
SELECT * FROM t1 ORDER BY a;
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
INSERT INTO t1 VALUES (20);
--save_master_pos
--connection server_2
--source include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
--echo *** Test when slave IO thread needs to reconnect in the middle of an event group. ***
--connection server_2
--source include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
RESET MASTER;
--source include/kill_binlog_dump_threads.inc
SET GLOBAL debug_dbug="+d,binlog_force_reconnect_after_22_events";
# 4 events for FD, fake rotate, gtid list, binlog checkpoint.
# 2 events for GTID, create table
CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
# 3 events for BEGIN/query/COMMIT
INSERT INTO t2 VALUES (1);
# 4 events for BEGIN/query/query/COMMIT
BEGIN;
INSERT INTO t2 VALUES (10);
INSERT INTO t2 VALUES (11);
COMMIT;
# So this event group starts after 4+2+4+3=13 events. Or 16 in row-based.
BEGIN;
INSERT INTO t2 VALUES (20);
INSERT INTO t2 VALUES (21);
INSERT INTO t2 VALUES (22);
INSERT INTO t2 VALUES (23);
INSERT INTO t2 VALUES (24);
INSERT INTO t2 VALUES (25);
INSERT INTO t2 VALUES (26);
INSERT INTO t2 VALUES (27);
INSERT INTO t2 VALUES (28);
INSERT INTO t2 VALUES (29);
COMMIT;
--save_master_pos
--connection server_2
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t2 ORDER BY a;
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
# Clean up.
--connection server_1
DROP TABLE t1, t2;
--source include/rpl_end.inc
...@@ -482,26 +482,10 @@ rpl_slave_state_tostring_helper(String *dest, const rpl_gtid *gtid, bool *first) ...@@ -482,26 +482,10 @@ rpl_slave_state_tostring_helper(String *dest, const rpl_gtid *gtid, bool *first)
} }
/*
Prepare the current slave state as a string, suitable for sending to the
master to request to receive binlog events starting from that GTID state.
The state consists of the most recently applied GTID for each domain_id,
ie. the one with the highest sub_id within each domain_id.
Optinally, extra_gtids is a list of GTIDs from the binlog. This is used when
a server was previously a master and now needs to connect to a new master as
a slave. For each domain_id, if the GTID in the binlog was logged with our
own server_id _and_ has a higher seq_no than what is in the slave state,
then this should be used as the position to start replicating at. This
allows to promote a slave as new master, and connect the old master as a
slave with MASTER_GTID_POS=AUTO.
*/
int int
rpl_slave_state::tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra) rpl_slave_state::iterate(int (*cb)(rpl_gtid *, void *), void *data,
rpl_gtid *extra_gtids, uint32 num_extra)
{ {
bool first= true;
uint32 i; uint32 i;
HASH gtid_hash; HASH gtid_hash;
uchar *rec; uchar *rec;
...@@ -555,7 +539,7 @@ rpl_slave_state::tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra) ...@@ -555,7 +539,7 @@ rpl_slave_state::tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra)
} }
} }
if (rpl_slave_state_tostring_helper(dest, &best_gtid, &first)) if ((res= (*cb)(&best_gtid, data)))
{ {
unlock(); unlock();
goto err; goto err;
...@@ -568,7 +552,7 @@ rpl_slave_state::tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra) ...@@ -568,7 +552,7 @@ rpl_slave_state::tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra)
for (i= 0; i < gtid_hash.records; ++i) for (i= 0; i < gtid_hash.records; ++i)
{ {
gtid= (rpl_gtid *)my_hash_element(&gtid_hash, i); gtid= (rpl_gtid *)my_hash_element(&gtid_hash, i);
if (rpl_slave_state_tostring_helper(dest, gtid, &first)) if ((res= (*cb)(gtid, data)))
goto err; goto err;
} }
...@@ -581,6 +565,44 @@ err: ...@@ -581,6 +565,44 @@ err:
} }
struct rpl_slave_state_tostring_data {
String *dest;
bool first;
};
static int
rpl_slave_state_tostring_cb(rpl_gtid *gtid, void *data)
{
rpl_slave_state_tostring_data *p= (rpl_slave_state_tostring_data *)data;
return rpl_slave_state_tostring_helper(p->dest, gtid, &p->first);
}
/*
Prepare the current slave state as a string, suitable for sending to the
master to request to receive binlog events starting from that GTID state.
The state consists of the most recently applied GTID for each domain_id,
ie. the one with the highest sub_id within each domain_id.
Optinally, extra_gtids is a list of GTIDs from the binlog. This is used when
a server was previously a master and now needs to connect to a new master as
a slave. For each domain_id, if the GTID in the binlog was logged with our
own server_id _and_ has a higher seq_no than what is in the slave state,
then this should be used as the position to start replicating at. This
allows to promote a slave as new master, and connect the old master as a
slave with MASTER_GTID_POS=AUTO.
*/
int
rpl_slave_state::tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra)
{
struct rpl_slave_state_tostring_data data;
data.first= true;
data.dest= dest;
return iterate(rpl_slave_state_tostring_cb, &data, extra_gtids, num_extra);
}
/* /*
Lookup a domain_id in the current replication slave state. Lookup a domain_id in the current replication slave state.
...@@ -626,9 +648,6 @@ rpl_slave_state::domain_to_gtid(uint32 domain_id, rpl_gtid *out_gtid) ...@@ -626,9 +648,6 @@ rpl_slave_state::domain_to_gtid(uint32 domain_id, rpl_gtid *out_gtid)
Parse a GTID at the start of a string, and update the pointer to point Parse a GTID at the start of a string, and update the pointer to point
at the first character after the parsed GTID. at the first character after the parsed GTID.
GTID can be in short form with domain_id=0 implied, SERVERID-SEQNO.
Or long form, DOMAINID-SERVERID-SEQNO.
Returns 0 on ok, non-zero on parse error. Returns 0 on ok, non-zero on parse error.
*/ */
static int static int
...@@ -1217,7 +1236,7 @@ slave_connection_state::load(char *slave_request, size_t len) ...@@ -1217,7 +1236,7 @@ slave_connection_state::load(char *slave_request, size_t len)
rpl_gtid *gtid; rpl_gtid *gtid;
const rpl_gtid *gtid2; const rpl_gtid *gtid2;
my_hash_reset(&hash); reset();
p= slave_request; p= slave_request;
end= slave_request + len; end= slave_request + len;
if (p == end) if (p == end)
...@@ -1270,7 +1289,7 @@ slave_connection_state::load(const rpl_gtid *gtid_list, uint32 count) ...@@ -1270,7 +1289,7 @@ slave_connection_state::load(const rpl_gtid *gtid_list, uint32 count)
{ {
uint32 i; uint32 i;
my_hash_reset(&hash); reset();
for (i= 0; i < count; ++i) for (i= 0; i < count; ++i)
if (update(&gtid_list[i])) if (update(&gtid_list[i]))
return 1; return 1;
...@@ -1278,6 +1297,28 @@ slave_connection_state::load(const rpl_gtid *gtid_list, uint32 count) ...@@ -1278,6 +1297,28 @@ slave_connection_state::load(const rpl_gtid *gtid_list, uint32 count)
} }
static int
slave_connection_state_load_cb(rpl_gtid *gtid, void *data)
{
slave_connection_state *state= (slave_connection_state *)data;
return state->update(gtid);
}
/*
Same as rpl_slave_state::tostring(), but populates a slave_connection_state
instead.
*/
int
slave_connection_state::load(rpl_slave_state *state,
rpl_gtid *extra_gtids, uint32 num_extra)
{
reset();
return state->iterate(slave_connection_state_load_cb, this,
extra_gtids, num_extra);
}
rpl_gtid * rpl_gtid *
slave_connection_state::find(uint32 domain_id) slave_connection_state::find(uint32 domain_id)
{ {
......
...@@ -93,6 +93,8 @@ struct rpl_slave_state ...@@ -93,6 +93,8 @@ struct rpl_slave_state
int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
bool in_transaction, bool in_statement); bool in_transaction, bool in_statement);
uint64 next_subid(uint32 domain_id); uint64 next_subid(uint32 domain_id);
int iterate(int (*cb)(rpl_gtid *, void *), void *data,
rpl_gtid *extra_gtids, uint32 num_extra);
int tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra); int tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra);
bool domain_to_gtid(uint32 domain_id, rpl_gtid *out_gtid); bool domain_to_gtid(uint32 domain_id, rpl_gtid *out_gtid);
int load(THD *thd, char *state_from_master, size_t len, bool reset, int load(THD *thd, char *state_from_master, size_t len, bool reset,
...@@ -178,8 +180,10 @@ struct slave_connection_state ...@@ -178,8 +180,10 @@ struct slave_connection_state
slave_connection_state(); slave_connection_state();
~slave_connection_state(); ~slave_connection_state();
void reset() { my_hash_reset(&hash); }
int load(char *slave_request, size_t len); int load(char *slave_request, size_t len);
int load(const rpl_gtid *gtid_list, uint32 count); int load(const rpl_gtid *gtid_list, uint32 count);
int load(rpl_slave_state *state, rpl_gtid *extra_gtids, uint32 num_extra);
rpl_gtid *find(uint32 domain_id); rpl_gtid *find(uint32 domain_id);
int update(const rpl_gtid *in_gtid); int update(const rpl_gtid *in_gtid);
void remove(const rpl_gtid *gtid); void remove(const rpl_gtid *gtid);
......
...@@ -38,7 +38,8 @@ Master_info::Master_info(LEX_STRING *connection_name_arg, ...@@ -38,7 +38,8 @@ Master_info::Master_info(LEX_STRING *connection_name_arg,
connect_retry(DEFAULT_CONNECT_RETRY), inited(0), abort_slave(0), connect_retry(DEFAULT_CONNECT_RETRY), inited(0), abort_slave(0),
slave_running(0), slave_run_id(0), sync_counter(0), slave_running(0), slave_run_id(0), sync_counter(0),
heartbeat_period(0), received_heartbeats(0), master_id(0), heartbeat_period(0), received_heartbeats(0), master_id(0),
using_gtid(USE_GTID_NO) using_gtid(USE_GTID_NO), events_queued_since_last_gtid(0),
gtid_reconnect_event_skip_count(0), gtid_event_seen(false)
{ {
host[0] = 0; user[0] = 0; password[0] = 0; host[0] = 0; user[0] = 0; password[0] = 0;
ssl_ca[0]= 0; ssl_capath[0]= 0; ssl_cert[0]= 0; ssl_ca[0]= 0; ssl_capath[0]= 0; ssl_cert[0]= 0;
...@@ -147,6 +148,23 @@ void Master_info::clear_in_memory_info(bool all) ...@@ -147,6 +148,23 @@ void Master_info::clear_in_memory_info(bool all)
} }
} }
const char *
Master_info::using_gtid_astext(enum enum_using_gtid arg)
{
switch (arg)
{
case USE_GTID_NO:
return "No";
case USE_GTID_SLAVE_POS:
return "Slave_Pos";
default:
DBUG_ASSERT(arg == USE_GTID_CURRENT_POS);
return "Current_Pos";
}
}
void init_master_log_pos(Master_info* mi) void init_master_log_pos(Master_info* mi)
{ {
DBUG_ENTER("init_master_log_pos"); DBUG_ENTER("init_master_log_pos");
...@@ -154,6 +172,10 @@ void init_master_log_pos(Master_info* mi) ...@@ -154,6 +172,10 @@ void init_master_log_pos(Master_info* mi)
mi->master_log_name[0] = 0; mi->master_log_name[0] = 0;
mi->master_log_pos = BIN_LOG_HEADER_SIZE; // skip magic number mi->master_log_pos = BIN_LOG_HEADER_SIZE; // skip magic number
mi->using_gtid= Master_info::USE_GTID_NO; mi->using_gtid= Master_info::USE_GTID_NO;
mi->gtid_current_pos.reset();
mi->events_queued_since_last_gtid= 0;
mi->gtid_reconnect_event_skip_count= 0;
mi->gtid_event_seen= false;
/* Intentionally init ssl_verify_server_cert to 0, no option available */ /* Intentionally init ssl_verify_server_cert to 0, no option available */
mi->ssl_verify_server_cert= 0; mi->ssl_verify_server_cert= 0;
......
...@@ -61,6 +61,10 @@ typedef struct st_mysql MYSQL; ...@@ -61,6 +61,10 @@ typedef struct st_mysql MYSQL;
class Master_info : public Slave_reporting_capability class Master_info : public Slave_reporting_capability
{ {
public: public:
enum enum_using_gtid {
USE_GTID_NO= 0, USE_GTID_CURRENT_POS= 1, USE_GTID_SLAVE_POS= 2
};
Master_info(LEX_STRING *connection_name, bool is_slave_recovery); Master_info(LEX_STRING *connection_name, bool is_slave_recovery);
~Master_info(); ~Master_info();
bool shall_ignore_server_id(ulong s_id); bool shall_ignore_server_id(ulong s_id);
...@@ -70,6 +74,7 @@ class Master_info : public Slave_reporting_capability ...@@ -70,6 +74,7 @@ class Master_info : public Slave_reporting_capability
/* If malloc() in initialization failed */ /* If malloc() in initialization failed */
return connection_name.str == 0; return connection_name.str == 0;
} }
static const char *using_gtid_astext(enum enum_using_gtid arg);
/* the variables below are needed because we can change masters on the fly */ /* the variables below are needed because we can change masters on the fly */
char master_log_name[FN_REFLEN+6]; /* Room for multi-*/ char master_log_name[FN_REFLEN+6]; /* Room for multi-*/
...@@ -135,9 +140,35 @@ class Master_info : public Slave_reporting_capability ...@@ -135,9 +140,35 @@ class Master_info : public Slave_reporting_capability
Note that you can not change the numeric values of these, they are used Note that you can not change the numeric values of these, they are used
in master.info. in master.info.
*/ */
enum { enum enum_using_gtid using_gtid;
USE_GTID_NO= 0, USE_GTID_CURRENT_POS= 1, USE_GTID_SLAVE_POS= 2
} using_gtid; /*
This GTID position records how far we have fetched into the relay logs.
This is used to continue fetching when the IO thread reconnects to the
master.
(Full slave stop/start does not use it, as it resets the relay logs).
*/
slave_connection_state gtid_current_pos;
/*
If events_queued_since_last_gtid is non-zero, it is the number of events
queued so far in the relaylog of a GTID-prefixed event group.
It is zero when no partial event group has been queued at the moment.
*/
uint64 events_queued_since_last_gtid;
/*
The GTID of the partially-queued event group, when
events_queued_since_last_gtid is non-zero.
*/
rpl_gtid last_queued_gtid;
/*
When slave IO thread needs to reconnect, gtid_reconnect_event_skip_count
counts number of events to skip from the first GTID-prefixed event group,
to avoid duplicating events in the relay log.
*/
uint64 gtid_reconnect_event_skip_count;
/* gtid_event_seen is false until we receive first GTID event from master. */
bool gtid_event_seen;
}; };
int init_master_info(Master_info* mi, const char* master_info_fname, int init_master_info(Master_info* mi, const char* master_info_fname,
const char* slave_info_fname, const char* slave_info_fname,
......
...@@ -6549,3 +6549,5 @@ ER_GTID_STRICT_OUT_OF_ORDER ...@@ -6549,3 +6549,5 @@ ER_GTID_STRICT_OUT_OF_ORDER
eng "An attempt was made to binlog GTID %u-%u-%llu which would create an out-of-order sequence number with existing GTID %u-%u-%llu, and gtid strict mode is enabled." eng "An attempt was made to binlog GTID %u-%u-%llu which would create an out-of-order sequence number with existing GTID %u-%u-%llu, and gtid strict mode is enabled."
ER_GTID_START_FROM_BINLOG_HOLE ER_GTID_START_FROM_BINLOG_HOLE
eng "The binlog on the master is missing the GTID %u-%u-%llu requested by the slave (even though both a prior and a subsequent sequence number does exist), and GTID strict mode is enabled" eng "The binlog on the master is missing the GTID %u-%u-%llu requested by the slave (even though both a prior and a subsequent sequence number does exist), and GTID strict mode is enabled"
ER_SLAVE_UNEXPECTED_MASTER_SWITCH
eng "Unexpected GTID received from master after reconnect. This normally indicates that the master server was replaced without restarting the slave threads. %s"
This diff is collapsed.
...@@ -2776,13 +2776,34 @@ end_with_restore_list: ...@@ -2776,13 +2776,34 @@ end_with_restore_list:
{ {
LEX_MASTER_INFO* lex_mi= &thd->lex->mi; LEX_MASTER_INFO* lex_mi= &thd->lex->mi;
Master_info *mi; Master_info *mi;
int load_error;
load_error= rpl_load_gtid_slave_state(thd);
mysql_mutex_lock(&LOCK_active_mi); mysql_mutex_lock(&LOCK_active_mi);
if ((mi= (master_info_index-> if ((mi= (master_info_index->
get_master_info(&lex_mi->connection_name, get_master_info(&lex_mi->connection_name,
MYSQL_ERROR::WARN_LEVEL_ERROR)))) MYSQL_ERROR::WARN_LEVEL_ERROR))))
{
if (load_error)
{
/*
We cannot start a slave using GTID if we cannot load the GTID position
from the mysql.gtid_slave_pos table. But we can allow non-GTID
replication (useful eg. during upgrade).
*/
if (mi->using_gtid != Master_info::USE_GTID_NO)
{
mysql_mutex_unlock(&LOCK_active_mi);
break;
}
else
thd->clear_error();
}
if (!start_slave(thd, mi, 1 /* net report*/)) if (!start_slave(thd, mi, 1 /* net report*/))
my_ok(thd); my_ok(thd);
}
mysql_mutex_unlock(&LOCK_active_mi); mysql_mutex_unlock(&LOCK_active_mi);
break; break;
} }
......
This diff is collapsed.
...@@ -32,6 +32,8 @@ typedef struct st_slave_info ...@@ -32,6 +32,8 @@ typedef struct st_slave_info
THD* thd; THD* thd;
} SLAVE_INFO; } SLAVE_INFO;
class slave_connection_state;
extern my_bool opt_show_slave_auth_info; extern my_bool opt_show_slave_auth_info;
extern char *master_host, *master_info_file; extern char *master_host, *master_info_file;
extern bool server_id_supplied; extern bool server_id_supplied;
...@@ -70,6 +72,7 @@ void rpl_init_gtid_slave_state(); ...@@ -70,6 +72,7 @@ void rpl_init_gtid_slave_state();
void rpl_deinit_gtid_slave_state(); void rpl_deinit_gtid_slave_state();
int gtid_state_from_binlog_pos(const char *name, uint32 pos, String *out_str); int gtid_state_from_binlog_pos(const char *name, uint32 pos, String *out_str);
int rpl_append_gtid_state(String *dest, bool use_binlog); int rpl_append_gtid_state(String *dest, bool use_binlog);
int rpl_load_gtid_state(slave_connection_state *state, bool use_binlog);
bool rpl_gtid_pos_check(THD *thd, char *str, size_t len); bool rpl_gtid_pos_check(THD *thd, char *str, size_t len);
bool rpl_gtid_pos_update(THD *thd, char *str, size_t len); bool rpl_gtid_pos_update(THD *thd, char *str, size_t len);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment