Commit b3529691 authored by unknown's avatar unknown

MDEV-5914: Parallel replication deadlock due to InnoDB lock conflicts

Due to how gap locks work, two transactions could group commit together on the
master, but get lock conflicts and then deadlock due to different thread
scheduling order on slave.

For now, remove these deadlocks by running the parallel slave in READ
COMMITTED mode. And let InnoDB/XtraDB allow statement-based binlogging for the
parallel slave in READ COMMITTED.

We are also investigating a different solution long-term, which is based on
relaxing the gap locks only between the transactions running in parallel for
one slave, but not against possibly external transactions.
parent a5418c55
......@@ -622,6 +622,7 @@ void **thd_ha_data(const MYSQL_THD thd, const struct handlerton *hton);
void thd_storage_lock_wait(MYSQL_THD thd, long long value);
int thd_tx_isolation(const MYSQL_THD thd);
int thd_tx_is_read_only(const MYSQL_THD thd);
int thd_rpl_is_parallel(const MYSQL_THD thd);
/**
Create a temporary file.
......
......@@ -303,6 +303,7 @@ void **thd_ha_data(const void* thd, const struct handlerton *hton);
void thd_storage_lock_wait(void* thd, long long value);
int thd_tx_isolation(const void* thd);
int thd_tx_is_read_only(const void* thd);
int thd_rpl_is_parallel(const void* thd);
int mysql_tmpfile(const char *prefix);
unsigned long thd_get_thread_id(const void* thd);
void thd_get_xid(const void* thd, MYSQL_XID *xid);
......
......@@ -303,6 +303,7 @@ void **thd_ha_data(const void* thd, const struct handlerton *hton);
void thd_storage_lock_wait(void* thd, long long value);
int thd_tx_isolation(const void* thd);
int thd_tx_is_read_only(const void* thd);
int thd_rpl_is_parallel(const void* thd);
int mysql_tmpfile(const char *prefix);
unsigned long thd_get_thread_id(const void* thd);
void thd_get_xid(const void* thd, MYSQL_XID *xid);
......
......@@ -256,6 +256,7 @@ void **thd_ha_data(const void* thd, const struct handlerton *hton);
void thd_storage_lock_wait(void* thd, long long value);
int thd_tx_isolation(const void* thd);
int thd_tx_is_read_only(const void* thd);
int thd_rpl_is_parallel(const void* thd);
int mysql_tmpfile(const char *prefix);
unsigned long thd_get_thread_id(const void* thd);
void thd_get_xid(const void* thd, MYSQL_XID *xid);
......
......@@ -761,11 +761,51 @@ a b
110 1
111 2
112 3
***MDEV-5914: Parallel replication deadlock due to InnoDB lock conflicts ***
include/stop_slave.inc
CREATE TABLE t4 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB;
INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6);
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';
UPDATE t4 SET b=NULL WHERE a=6;
SET debug_sync='now WAIT_FOR master_queued1';
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';
DELETE FROM t4 WHERE b <= 3;
SET debug_sync='now WAIT_FOR master_queued2';
SET debug_sync='now SIGNAL master_cont1';
SET debug_sync='RESET';
include/start_slave.inc
include/stop_slave.inc
SELECT * FROM t4 ORDER BY a;
a b
1 NULL
3 NULL
4 4
5 NULL
6 NULL
DELETE FROM t4;
INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6);
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';
INSERT INTO t4 VALUES (7, NULL);
SET debug_sync='now WAIT_FOR master_queued1';
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';
DELETE FROM t4 WHERE b <= 3;
SET debug_sync='now WAIT_FOR master_queued2';
SET debug_sync='now SIGNAL master_cont1';
SET debug_sync='RESET';
include/start_slave.inc
SELECT * FROM t4 ORDER BY a;
a b
1 NULL
3 NULL
4 4
5 NULL
6 6
7 NULL
include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc
SET DEBUG_SYNC= 'RESET';
DROP function foo;
DROP TABLE t1,t2,t3;
DROP TABLE t1,t2,t3,t4;
SET DEBUG_SYNC= 'RESET';
include/rpl_end.inc
......@@ -1172,6 +1172,84 @@ START SLAVE SQL_THREAD;
SELECT * FROM t3 WHERE a >= 110 ORDER BY a;
--echo ***MDEV-5914: Parallel replication deadlock due to InnoDB lock conflicts ***
--connection server_2
--source include/stop_slave.inc
--connection server_1
CREATE TABLE t4 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB;
INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6);
# Create a group commit with UPDATE and DELETE, in that order.
# The bug was that while the UPDATE's row lock does not block the DELETE, the
# DELETE's gap lock _does_ block the UPDATE. This could cause a deadlock
# on the slave.
--connection con1
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';
send UPDATE t4 SET b=NULL WHERE a=6;
--connection server_1
SET debug_sync='now WAIT_FOR master_queued1';
--connection con2
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';
send DELETE FROM t4 WHERE b <= 3;
--connection server_1
SET debug_sync='now WAIT_FOR master_queued2';
SET debug_sync='now SIGNAL master_cont1';
--connection con1
REAP;
--connection con2
REAP;
SET debug_sync='RESET';
--save_master_pos
--connection server_2
--source include/start_slave.inc
--sync_with_master
--source include/stop_slave.inc
SELECT * FROM t4 ORDER BY a;
# Another example, this one with INSERT vs. DELETE
--connection server_1
DELETE FROM t4;
INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6);
# Create a group commit with INSERT and DELETE, in that order.
# The bug was that while the INSERT's insert intention lock does not block
# the DELETE, the DELETE's gap lock _does_ block the INSERT. This could cause
# a deadlock on the slave.
--connection con1
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';
send INSERT INTO t4 VALUES (7, NULL);
--connection server_1
SET debug_sync='now WAIT_FOR master_queued1';
--connection con2
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';
send DELETE FROM t4 WHERE b <= 3;
--connection server_1
SET debug_sync='now WAIT_FOR master_queued2';
SET debug_sync='now SIGNAL master_cont1';
--connection con1
REAP;
--connection con2
REAP;
SET debug_sync='RESET';
--save_master_pos
--connection server_2
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t4 ORDER BY a;
--connection server_2
--source include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
......@@ -1180,7 +1258,7 @@ SET DEBUG_SYNC= 'RESET';
--connection server_1
DROP function foo;
DROP TABLE t1,t2,t3;
DROP TABLE t1,t2,t3,t4;
SET DEBUG_SYNC= 'RESET';
--source include/rpl_end.inc
......@@ -242,6 +242,39 @@ handle_rpl_parallel_thread(void *arg)
thd_proc_info(thd, "Waiting for work from main SQL threads");
thd->set_time();
thd->variables.lock_wait_timeout= LONG_TIMEOUT;
/*
For now, we need to run the replication parallel worker threads in
READ COMMITTED. This is needed because gap locks are not symmetric.
For example, a gap lock from a DELETE blocks an insert intention lock,
but not vice versa. So an INSERT followed by DELETE can group commit
on the master, but if we are unlucky with thread scheduling we can
then deadlock on the slave because the INSERT ends up waiting for a
gap lock from the DELETE (and the DELETE in turn waits for the INSERT
in wait_for_prior_commit()). See also MDEV-5914.
It should be mostly safe to run in READ COMMITTED in the slave anyway.
The commit order is already fixed from on the master, so we do not
risk logging into the binlog in an incorrect order between worker
threads (one that would cause different results if executed on a
lower-level slave that uses this slave as a master). The only
potential problem is with transactions run in a different master
connection (using multi-source replication), or run directly on the
slave by an application; when using READ COMMITTED we are not
guaranteed serialisability of binlogged statements.
In practice, this is unlikely to be an issue. In GTID mode, such
parallel transactions from multi-source or application must in any
case use a different replication domain, in which case binlog order
by definition must be independent between the different domain. Even
in non-GTID mode, normally one will assume that the external
transactions are not conflicting with those applied by the slave, so
that isolation level should make no difference. It would be rather
strange if the result of applying query events from one master would
depend on the timing and nature of other queries executed from
different multi-source connections or done directly on the slave by
an application. Still, something to be aware of.
*/
thd->variables.tx_isolation= ISO_READ_COMMITTED;
mysql_mutex_lock(&rpt->LOCK_rpl_thread);
rpt->thd= thd;
......@@ -306,6 +339,7 @@ handle_rpl_parallel_thread(void *arg)
PSI_stage_info old_stage;
uint64 wait_count;
thd->tx_isolation= (enum_tx_isolation)thd->variables.tx_isolation;
in_event_group= true;
/*
If the standalone flag is set, then this event group consists of a
......
......@@ -4234,6 +4234,12 @@ extern "C" int thd_slave_thread(const MYSQL_THD thd)
return(thd->slave_thread);
}
/* Returns true for a worker thread in parallel replication. */
extern "C" int thd_rpl_is_parallel(const MYSQL_THD thd)
{
return thd->rgi_slave && thd->rgi_slave->is_parallel_exec;
}
extern "C" int thd_non_transactional_update(const MYSQL_THD thd)
{
return(thd->transaction.all.modified_non_trans_table);
......
......@@ -4223,11 +4223,14 @@ handler::Table_flags
ha_innobase::table_flags() const
/*============================*/
{
THD *thd = ha_thd();
/* Need to use tx_isolation here since table flags is (also)
called before prebuilt is inited. */
ulong const tx_isolation = thd_tx_isolation(ha_thd());
ulong const tx_isolation = thd_tx_isolation(thd);
if (tx_isolation <= ISO_READ_COMMITTED) {
if (tx_isolation <= ISO_READ_COMMITTED &&
!(tx_isolation == ISO_READ_COMMITTED &&
thd_rpl_is_parallel(thd))) {
return(int_table_flags);
}
......
......@@ -4680,11 +4680,14 @@ handler::Table_flags
ha_innobase::table_flags() const
/*============================*/
{
THD *thd = ha_thd();
/* Need to use tx_isolation here since table flags is (also)
called before prebuilt is inited. */
ulong const tx_isolation = thd_tx_isolation(ha_thd());
ulong const tx_isolation = thd_tx_isolation(thd);
if (tx_isolation <= ISO_READ_COMMITTED) {
if (tx_isolation <= ISO_READ_COMMITTED &&
!(tx_isolation == ISO_READ_COMMITTED &&
thd_rpl_is_parallel(thd))) {
return(int_table_flags);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment