Commit 8128a468 authored by Vlad Lesin's avatar Vlad Lesin

MDEV-28709 unexpected X lock on Supremum in READ COMMITTED

The lock is created during page splitting after moving records and
locks(lock_move_rec_list_(start|end)()) to the new page, and inheriting
the locks to the supremum of left page from the successor of the infimum
on right page.

There is no need in such inheritance for READ COMMITTED isolation level
and not-gap locks, so the fix is to add the corresponding condition in
gap lock inheritance function.

One more fix is to forbid gap lock inheritance if XA was prepared. Use the
most significant bit of trx_t::n_ref to indicate that gap lock inheritance
is forbidden. This fix is based on
mysql/mysql-server@b063e52a8367dc9d5ed418e7f6d96400867e9f43
parent ce2825a8
CREATE TABLE t (
`a` INT NOT NULL,
`b` INT NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=InnoDB;
SET GLOBAL innodb_limit_optimistic_insert_debug = 3;
INSERT INTO t VALUES(10, 0);
INSERT INTO t VALUES(20, 0);
INSERT INTO t VALUES(30, 0);
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
XA START '1';
REPLACE INTO t VALUES(10, 1);
REPLACE INTO t VALUES(20, 1);
SET DEBUG_SYNC= 'ib_after_row_insert SIGNAL inserted WAIT_FOR cont';
REPLACE INTO t VALUES(30, 1);
connect con1,localhost,root;
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
XA START '2';
SET DEBUG_SYNC= 'now WAIT_FOR inserted';
INSERT INTO t VALUES(40, 2);
SET DEBUG_SYNC= 'now SIGNAL cont';
connection default;
XA END '1';
XA PREPARE '1';
connection default;
XA COMMIT '1';
connection con1;
XA END '2';
XA PREPARE '2';
XA COMMIT '2';
disconnect con1;
connection default;
SET DEBUG_SYNC= "RESET";
DROP TABLE t;
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/count_sessions.inc
CREATE TABLE t (
`a` INT NOT NULL,
`b` INT NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=InnoDB;
--disable_query_log
SET @old_innodb_limit_optimistic_insert_debug = @@innodb_limit_optimistic_insert_debug;
--enable_query_log
SET GLOBAL innodb_limit_optimistic_insert_debug = 3;
INSERT INTO t VALUES(10, 0);
INSERT INTO t VALUES(20, 0);
INSERT INTO t VALUES(30, 0);
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
XA START '1';
REPLACE INTO t VALUES(10, 1);
REPLACE INTO t VALUES(20, 1);
# We need the following sync point because mysql_insert() resets
# trx->duplicates with the following condition:
#
# if (duplic == DUP_REPLACE &&
# (!table->triggers || !table->triggers->has_delete_triggers()))
# table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
#
# and ha_innobase::extra() resets trx_t::duplicates, but we need
# lock_update_split_right() to be invoked when trx->duplicates is set to
# repeat the bug. So the transaction will hang just after
# row_insert_for_mysql() call until another transaction inserts new row and
# splits the page.
SET DEBUG_SYNC= 'ib_after_row_insert SIGNAL inserted WAIT_FOR cont';
--send REPLACE INTO t VALUES(30, 1)
connect (con1,localhost,root);
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
XA START '2';
SET DEBUG_SYNC= 'now WAIT_FOR inserted';
# The following statement will cause page split and (20, ...) will be split
# record. As the previous REPLACE set non-gap X-lock on it,
# lock_update_split_right() and lock_rec_inherit_to_gap() will 'inherit' the
# lock from the very first (20, ...) new right page record to the supremum of
# the old left page, what should not be for READ COMMITTED isolation level
INSERT INTO t VALUES(40, 2);
SET DEBUG_SYNC= 'now SIGNAL cont';
--connection default
--reap
XA END '1';
# This will cause the assertion failure, because the supremum of the left page
# has X-lock.
XA PREPARE '1';
--connection default
XA COMMIT '1';
--connection con1
XA END '2';
XA PREPARE '2';
XA COMMIT '2';
--disconnect con1
--connection default
SET DEBUG_SYNC= "RESET";
DROP TABLE t;
--disable_query_log
SET GLOBAL innodb_limit_optimistic_insert_debug = @old_innodb_limit_optimistic_insert_debug;
--enable_query_log
--source include/wait_until_count_sessions.inc
...@@ -646,14 +646,19 @@ struct trx_rsegs_t { ...@@ -646,14 +646,19 @@ struct trx_rsegs_t {
struct trx_t : ilist_node<> { struct trx_t : ilist_node<> {
private: private:
/** /**
Count of references. Least significant 31 bits is count of references.
We can't release the locks nor commit the transaction until this reference We can't release the locks nor commit the transaction until this reference
is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify
that it is no longer "active". that it is no longer "active".
*/
Atomic_counter<int32_t> n_ref; If the most significant bit is set this transaction should stop inheriting
(GAP)locks. Generally set to true during transaction prepare for RC or lower
isolation, if requested. Needed for replication replay where
we don't want to get blocked on GAP locks taken for protecting
concurrent unique insert or replace operation.
*/
Atomic_relaxed<uint32_t> skip_lock_inheritance_and_n_ref;
public: public:
...@@ -983,27 +988,47 @@ struct trx_t : ilist_node<> { ...@@ -983,27 +988,47 @@ struct trx_t : ilist_node<> {
/** Commit the transaction. */ /** Commit the transaction. */
void commit(); void commit();
bool is_referenced() const
{
return (skip_lock_inheritance_and_n_ref & ~(1U << 31)) > 0;
}
bool is_referenced() const { return n_ref > 0; } void reference()
{
ut_d(auto old_n_ref =)
skip_lock_inheritance_and_n_ref.fetch_add(1);
ut_ad(int32_t(old_n_ref << 1) >= 0);
}
void release_reference()
{
ut_d(auto old_n_ref =)
skip_lock_inheritance_and_n_ref.fetch_sub(1);
ut_ad(int32_t(old_n_ref << 1) > 0);
}
void reference() bool is_not_inheriting_locks() const
{ {
#ifdef UNIV_DEBUG return skip_lock_inheritance_and_n_ref >> 31;
auto old_n_ref=
#endif
n_ref++;
ut_ad(old_n_ref >= 0);
} }
void set_skip_lock_inheritance()
{
ut_d(auto old_n_ref=) skip_lock_inheritance_and_n_ref.fetch_add(1U << 31);
ut_ad(!(old_n_ref >> 31));
}
void release_reference() void reset_skip_lock_inheritance()
{ {
#ifdef UNIV_DEBUG #if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
auto old_n_ref= __asm__("lock btrl $31, %0" : : "m"(skip_lock_inheritance_and_n_ref));
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
_interlockedbittestandreset(
reinterpret_cast<volatile long *>(&skip_lock_inheritance_and_n_ref),
31);
#else
skip_lock_inheritance_and_n_ref.fetch_and(~1U << 31);
#endif #endif
n_ref--;
ut_ad(old_n_ref > 0);
} }
/** @return whether the table has lock on /** @return whether the table has lock on
...@@ -1031,6 +1056,7 @@ struct trx_t : ilist_node<> { ...@@ -1031,6 +1056,7 @@ struct trx_t : ilist_node<> {
ut_ad(!autoinc_locks || ib_vector_is_empty(autoinc_locks)); ut_ad(!autoinc_locks || ib_vector_is_empty(autoinc_locks));
ut_ad(UT_LIST_GET_LEN(lock.evicted_tables) == 0); ut_ad(UT_LIST_GET_LEN(lock.evicted_tables) == 0);
ut_ad(dict_operation == TRX_DICT_OP_NONE); ut_ad(dict_operation == TRX_DICT_OP_NONE);
ut_ad(!is_not_inheriting_locks());
} }
/** @return whether this is a non-locking autocommit transaction */ /** @return whether this is a non-locking autocommit transaction */
......
...@@ -2291,29 +2291,26 @@ lock_rec_reset_and_release_wait( ...@@ -2291,29 +2291,26 @@ lock_rec_reset_and_release_wait(
&lock_sys.prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM); &lock_sys.prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM);
} }
/*************************************************************//** /** Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
of another record as gap type locks, but does not reset the lock bits of of another record as gap type locks, but does not reset the lock bits of
the other record. Also waiting lock requests on rec are inherited as the other record. Also waiting lock requests on rec are inherited as
GRANTED gap locks. */ GRANTED gap locks.
static @param heir_block block containing the record which inherits
void @param block block containing the record from which inherited; does NOT reset
lock_rec_inherit_to_gap( the locks on this record
/*====================*/ @param heir_heap_no heap_no of the inheriting record
const buf_block_t* heir_block, /*!< in: block containing the @param heap_no heap_no of the donating record
record which inherits */ @tparam from_split true if the function is invoked from
const buf_block_t* block, /*!< in: block containing the lock_update_split_(left|right)(), in this case not-gap
record from which inherited; locks are not inherited to supremum if transaction
does NOT reset the locks on isolation level less or equal to READ COMMITTED */
this record */ template <bool from_split= false>
ulint heir_heap_no, /*!< in: heap_no of the static void lock_rec_inherit_to_gap(const buf_block_t *heir_block,
inheriting record */ const buf_block_t *block,
ulint heap_no) /*!< in: heap_no of the ulint heir_heap_no, ulint heap_no)
donating record */
{ {
lock_t* lock;
ut_ad(lock_mutex_own()); ut_ad(lock_mutex_own());
ut_ad(!from_split || heir_heap_no == PAGE_HEAP_NO_SUPREMUM);
/* At READ UNCOMMITTED or READ COMMITTED isolation level, /* At READ UNCOMMITTED or READ COMMITTED isolation level,
we do not want locks set we do not want locks set
...@@ -2321,18 +2318,25 @@ lock_rec_inherit_to_gap( ...@@ -2321,18 +2318,25 @@ lock_rec_inherit_to_gap(
DO want S-locks/X-locks(taken for replace) set by a consistency DO want S-locks/X-locks(taken for replace) set by a consistency
constraint to be inherited also then. */ constraint to be inherited also then. */
for (lock = lock_rec_get_first(&lock_sys.rec_hash, block, heap_no); for (lock_t *lock= lock_rec_get_first(&lock_sys.rec_hash, block, heap_no);
lock != NULL; lock != NULL; lock= lock_rec_get_next(heap_no, lock))
lock = lock_rec_get_next(heap_no, lock)) { {
if (!lock_rec_get_insert_intention(lock) if (!lock->trx->is_not_inheriting_locks() &&
&& (lock->trx->isolation_level > TRX_ISO_READ_COMMITTED !lock_rec_get_insert_intention(lock) &&
|| lock_get_mode(lock) != (lock->trx->isolation_level > TRX_ISO_READ_COMMITTED ||
(lock->trx->duplicates ? LOCK_S : LOCK_X))) { /* When we are in a page split (not purge), then we don't set a lock
lock_rec_add_to_queue( on supremum if the donor lock type is LOCK_REC_NOT_GAP. That is, do
LOCK_REC | LOCK_GAP | lock_get_mode(lock), not create bogus gap locks for non-gap locks for READ UNCOMMITTED and
heir_block, heir_heap_no, lock->index, READ COMMITTED isolation levels. LOCK_ORDINARY and
lock->trx, FALSE); LOCK_GAP require a gap before the record to be locked, that is why
setting lock on supremmum is necessary. */
((!from_split || !lock->is_record_not_gap()) &&
(lock_get_mode(lock) != (lock->trx->duplicates ? LOCK_S : LOCK_X)))))
{
lock_rec_add_to_queue(LOCK_REC | LOCK_GAP | lock_get_mode(lock),
heir_block, heir_heap_no, lock->index, lock->trx,
FALSE);
} }
} }
} }
...@@ -2361,7 +2365,8 @@ lock_rec_inherit_to_gap_if_gap_lock( ...@@ -2361,7 +2365,8 @@ lock_rec_inherit_to_gap_if_gap_lock(
lock != NULL; lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) { lock = lock_rec_get_next(heap_no, lock)) {
if (!lock_rec_get_insert_intention(lock) if (!lock->trx->is_not_inheriting_locks()
&& !lock_rec_get_insert_intention(lock)
&& (heap_no == PAGE_HEAP_NO_SUPREMUM && (heap_no == PAGE_HEAP_NO_SUPREMUM
|| !lock_rec_get_rec_not_gap(lock))) { || !lock_rec_get_rec_not_gap(lock))) {
...@@ -2943,7 +2948,7 @@ lock_update_split_right( ...@@ -2943,7 +2948,7 @@ lock_update_split_right(
/* Inherit the locks to the supremum of left page from the successor /* Inherit the locks to the supremum of left page from the successor
of the infimum on right page */ of the infimum on right page */
lock_rec_inherit_to_gap(left_block, right_block, lock_rec_inherit_to_gap<true>(left_block, right_block,
PAGE_HEAP_NO_SUPREMUM, heap_no); PAGE_HEAP_NO_SUPREMUM, heap_no);
lock_mutex_exit(); lock_mutex_exit();
...@@ -3063,7 +3068,7 @@ lock_update_split_left( ...@@ -3063,7 +3068,7 @@ lock_update_split_left(
/* Inherit the locks to the supremum of the left page from the /* Inherit the locks to the supremum of the left page from the
successor of the infimum on the right page */ successor of the infimum on the right page */
lock_rec_inherit_to_gap(left_block, right_block, lock_rec_inherit_to_gap<true>(left_block, right_block,
PAGE_HEAP_NO_SUPREMUM, heap_no); PAGE_HEAP_NO_SUPREMUM, heap_no);
lock_mutex_exit(); lock_mutex_exit();
...@@ -4251,6 +4256,11 @@ void lock_release_on_prepare(trx_t *trx) ...@@ -4251,6 +4256,11 @@ void lock_release_on_prepare(trx_t *trx)
{ {
ut_ad(trx->dict_operation || ut_ad(trx->dict_operation ||
lock->index->table->id >= DICT_HDR_FIRST_ID); lock->index->table->id >= DICT_HDR_FIRST_ID);
ut_ad(lock->trx->isolation_level > TRX_ISO_READ_COMMITTED ||
/* Insert-intention lock is valid for supremum for isolation
level > TRX_ISO_READ_COMMITTED */
lock_get_mode(lock) == LOCK_X ||
!lock_rec_get_nth_bit(lock, PAGE_HEAP_NO_SUPREMUM));
retain_lock: retain_lock:
lock= UT_LIST_GET_PREV(trx_locks, lock); lock= UT_LIST_GET_PREV(trx_locks, lock);
continue; continue;
...@@ -4287,6 +4297,8 @@ void lock_release_on_prepare(trx_t *trx) ...@@ -4287,6 +4297,8 @@ void lock_release_on_prepare(trx_t *trx)
} }
lock_mutex_exit(); lock_mutex_exit();
trx->set_skip_lock_inheritance();
} }
/* True if a lock mode is S or X */ /* True if a lock mode is S or X */
......
...@@ -412,7 +412,8 @@ void trx_t::free() ...@@ -412,7 +412,8 @@ void trx_t::free()
mod_tables.clear(); mod_tables.clear();
MEM_NOACCESS(&n_ref, sizeof n_ref); MEM_NOACCESS(&skip_lock_inheritance_and_n_ref,
sizeof skip_lock_inheritance_and_n_ref);
/* do not poison mutex */ /* do not poison mutex */
MEM_NOACCESS(&id, sizeof id); MEM_NOACCESS(&id, sizeof id);
MEM_NOACCESS(&state, sizeof state); MEM_NOACCESS(&state, sizeof state);
...@@ -518,6 +519,7 @@ inline void trx_t::release_locks() ...@@ -518,6 +519,7 @@ inline void trx_t::release_locks()
} }
lock.table_locks.clear(); lock.table_locks.clear();
reset_skip_lock_inheritance();
} }
/** At shutdown, frees a transaction object. */ /** At shutdown, frees a transaction object. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment