Commit 35f59bc4 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-26467: More cache friendliness

srw_mutex_impl<bool>::wait_and_lock(): In
commit a73eedbf we introduced
an std::atomic::fetch_or() in a loop. Alas, on the IA-32 and AMD64,
that was being translated into a loop around LOCK CMPXCHG.
To avoid a nested loop, it is better to explicitly invoke
std::atomic::compare_exchange_weak() in the loop, but only if
the attempt has a chance to succeed (the HOLDER flag is not set).

It is even more efficient to use LOCK BTS, but contemporary compilers
fail to translate std::atomic::fetch_or(x) & x into that when x is
a single-bit constant. On GCC-compatible compilers, we will use
inline assembler to achieve that.

On other ISA than IA-32 and AMD64, we will continue to use
std::atomic::fetch_or().

ssux_lock_impl<spinloop>::rd_wait(): Use rd_lock_try().
A loop around std::atomic::compare_exchange_weak() should be
cheaper than fetch_add(), fetch_sub() and a wakeup system call.

These deficiencies were pointed out and the use of LOCK BTS was
suggested by Thiago Macieira.
parent 0d68b0a2
...@@ -294,57 +294,107 @@ void srw_mutex_impl<true>::wait_and_lock() ...@@ -294,57 +294,107 @@ void srw_mutex_impl<true>::wait_and_lock()
DBUG_ASSERT(~HOLDER & lk); DBUG_ASSERT(~HOLDER & lk);
if (lk & HOLDER) if (lk & HOLDER)
lk= lock.load(std::memory_order_relaxed); lk= lock.load(std::memory_order_relaxed);
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
#elif defined _M_IX86||defined _M_X64||defined __i386__||defined __x86_64__
else if (lock.compare_exchange_weak(lk, lk | HOLDER,
std::memory_order_acquire,
std::memory_order_relaxed))
return;
#else
else if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) &
HOLDER))
goto acquired;
#endif
else else
{ {
lk= lock.fetch_or(HOLDER, std::memory_order_relaxed); #if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
if (!(lk & HOLDER)) static_assert(HOLDER == (1U << 31), "compatibility");
goto acquired; __asm__ goto("lock btsl $31, %0\n\t"
"jnc %l1" : : "m" (*this) : "cc", "memory" : acquired);
lk|= HOLDER;
#endif
srw_pause(delay);
} }
srw_pause(delay);
if (!--spin) if (!--spin)
break; break;
} }
for (;; wait(lk)) for (;;)
{ {
DBUG_ASSERT(~HOLDER & lk);
if (lk & HOLDER) if (lk & HOLDER)
{ {
wait(lk);
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
reload:
#endif
lk= lock.load(std::memory_order_relaxed); lk= lock.load(std::memory_order_relaxed);
if (lk & HOLDER)
continue;
} }
lk= lock.fetch_or(HOLDER, std::memory_order_relaxed); #if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
if (!(lk & HOLDER)) else
{
static_assert(HOLDER == (1U << 31), "compatibility");
__asm__ goto("lock btsl $31, %0\n\t"
"jc %l1" : : "m" (*this) : "cc", "memory" : reload);
acquired:
std::atomic_thread_fence(std::memory_order_acquire);
return;
}
#elif defined _M_IX86||defined _M_X64||defined __i386__||defined __x86_64__
else if (lock.compare_exchange_weak(lk, lk | HOLDER,
std::memory_order_acquire,
std::memory_order_relaxed))
return;
#else
else if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) &
HOLDER))
{ {
acquired: acquired:
DBUG_ASSERT(lk); DBUG_ASSERT(lk);
std::atomic_thread_fence(std::memory_order_acquire); std::atomic_thread_fence(std::memory_order_acquire);
return; return;
} }
DBUG_ASSERT(lk > HOLDER); #endif
} }
} }
template<> template<>
void srw_mutex_impl<false>::wait_and_lock() void srw_mutex_impl<false>::wait_and_lock()
{ {
uint32_t lk= 1 + lock.fetch_add(1, std::memory_order_relaxed); for (uint32_t lk= 1 + lock.fetch_add(1, std::memory_order_relaxed);;)
for (;; wait(lk))
{ {
DBUG_ASSERT(~HOLDER & lk);
if (lk & HOLDER) if (lk & HOLDER)
{ {
wait(lk);
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
reload:
#endif
lk= lock.load(std::memory_order_relaxed); lk= lock.load(std::memory_order_relaxed);
if (lk & HOLDER)
continue;
} }
lk= lock.fetch_or(HOLDER, std::memory_order_relaxed); #if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
if (!(lk & HOLDER)) else
{
static_assert(HOLDER == (1U << 31), "compatibility");
__asm__ goto("lock btsl $31, %0\n\t"
"jc %l1" : : "m" (*this) : "cc", "memory" : reload);
std::atomic_thread_fence(std::memory_order_acquire);
return;
}
#elif defined _M_IX86||defined _M_X64||defined __i386__||defined __x86_64__
else if (lock.compare_exchange_weak(lk, lk | HOLDER,
std::memory_order_acquire,
std::memory_order_relaxed))
return;
#else
else if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) &
HOLDER))
{ {
DBUG_ASSERT(lk); DBUG_ASSERT(lk);
std::atomic_thread_fence(std::memory_order_acquire); std::atomic_thread_fence(std::memory_order_acquire);
return; return;
} }
DBUG_ASSERT(lk > HOLDER); #endif
} }
} }
...@@ -373,19 +423,12 @@ void ssux_lock_impl<spinloop>::rd_wait() ...@@ -373,19 +423,12 @@ void ssux_lock_impl<spinloop>::rd_wait()
for (;;) for (;;)
{ {
writer.wr_lock(); writer.wr_lock();
uint32_t lk= readers.fetch_add(1, std::memory_order_acquire); bool acquired= rd_lock_try();
if (UNIV_UNLIKELY(lk == WRITER)) writer.wr_unlock();
{ if (acquired)
readers.fetch_sub(1, std::memory_order_relaxed); break;
wake(); std::this_thread::yield();
writer.wr_unlock();
pthread_yield();
continue;
}
DBUG_ASSERT(!(lk & WRITER));
break;
} }
writer.wr_unlock();
} }
template void ssux_lock_impl<true>::rd_wait(); template void ssux_lock_impl<true>::rd_wait();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment