Commit 84a37cbf authored by Kent Overstreet's avatar Kent Overstreet

six locks: Wakeup now takes lock on behalf of waiter

This brings back an important optimization, to avoid touching the wait
lists an extra time, while preserving the property that a thread is on a
lock waitlist iff it is waiting - it is never removed from the waitlist
until it has the lock.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent e4b7254c
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
#define six_acquire(l, t, r) lock_acquire(l, 0, t, r, 1, NULL, _RET_IP_) #define six_acquire(l, t, r) lock_acquire(l, 0, t, r, 1, NULL, _RET_IP_)
#define six_release(l) lock_release(l, _RET_IP_) #define six_release(l) lock_release(l, _RET_IP_)
static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
struct six_lock_vals { struct six_lock_vals {
/* Value we add to the lock in order to take the lock: */ /* Value we add to the lock in order to take the lock: */
u64 lock_val; u64 lock_val;
...@@ -67,14 +69,15 @@ struct six_lock_vals { ...@@ -67,14 +69,15 @@ struct six_lock_vals {
} }
static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type, static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
union six_lock_state old) union six_lock_state old,
struct task_struct *owner)
{ {
if (type != SIX_LOCK_intent) if (type != SIX_LOCK_intent)
return; return;
if (!old.intent_lock) { if (!old.intent_lock) {
EBUG_ON(lock->owner); EBUG_ON(lock->owner);
lock->owner = current; lock->owner = owner;
} else { } else {
EBUG_ON(lock->owner != current); EBUG_ON(lock->owner != current);
} }
...@@ -93,47 +96,17 @@ static inline unsigned pcpu_read_count(struct six_lock *lock) ...@@ -93,47 +96,17 @@ static inline unsigned pcpu_read_count(struct six_lock *lock)
/* This is probably up there with the more evil things I've done */ /* This is probably up there with the more evil things I've done */
#define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l)) #define waitlist_bitnr(id) ilog2((((union six_lock_state) { .waiters = 1 << (id) }).l))
static inline void six_lock_wakeup(struct six_lock *lock, static int __do_six_trylock_type(struct six_lock *lock,
union six_lock_state state,
enum six_lock_type lock_type)
{
struct six_lock_waiter *w;
bool found = false;
if (lock_type == SIX_LOCK_write && state.read_lock)
return;
if (!(state.waiters & (1 << lock_type)))
return;
raw_spin_lock(&lock->wait_lock);
list_for_each_entry(w, &lock->wait_list, list) {
if (w->lock_want != lock_type)
continue;
found = true;
wake_up_process(w->task);
if (lock_type != SIX_LOCK_read)
break;
}
if (!found)
clear_bit(waitlist_bitnr(lock_type), (unsigned long *) &lock->state.v);
raw_spin_unlock(&lock->wait_lock);
}
static __always_inline bool do_six_trylock_type(struct six_lock *lock,
enum six_lock_type type, enum six_lock_type type,
struct task_struct *task,
bool try) bool try)
{ {
const struct six_lock_vals l[] = LOCK_VALS; const struct six_lock_vals l[] = LOCK_VALS;
union six_lock_state old, new; union six_lock_state old, new;
bool ret; int ret;
u64 v; u64 v;
EBUG_ON(type == SIX_LOCK_write && lock->owner != current); EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
EBUG_ON(type == SIX_LOCK_write && (lock->state.seq & 1)); EBUG_ON(type == SIX_LOCK_write && (lock->state.seq & 1));
EBUG_ON(type == SIX_LOCK_write && (try != !(lock->state.write_locking))); EBUG_ON(type == SIX_LOCK_write && (try != !(lock->state.write_locking)));
...@@ -153,7 +126,6 @@ static __always_inline bool do_six_trylock_type(struct six_lock *lock, ...@@ -153,7 +126,6 @@ static __always_inline bool do_six_trylock_type(struct six_lock *lock,
*/ */
if (type == SIX_LOCK_read && lock->readers) { if (type == SIX_LOCK_read && lock->readers) {
retry:
preempt_disable(); preempt_disable();
this_cpu_inc(*lock->readers); /* signal that we own lock */ this_cpu_inc(*lock->readers); /* signal that we own lock */
...@@ -171,28 +143,7 @@ static __always_inline bool do_six_trylock_type(struct six_lock *lock, ...@@ -171,28 +143,7 @@ static __always_inline bool do_six_trylock_type(struct six_lock *lock,
* spurious trylock failure: * spurious trylock failure:
*/ */
if (old.write_locking) if (old.write_locking)
six_lock_wakeup(lock, old, SIX_LOCK_write); ret = -1 - SIX_LOCK_write;
/*
* If we failed from the lock path and the waiting bit wasn't
* set, set it:
*/
if (!try && !ret) {
v = old.v;
do {
new.v = old.v = v;
if (!(old.v & l[type].lock_fail))
goto retry;
if (new.waiters & (1 << type))
break;
new.waiters |= 1 << type;
} while ((v = atomic64_cmpxchg(&lock->state.counter,
old.v, new.v)) != old.v);
}
} else if (type == SIX_LOCK_write && lock->readers) { } else if (type == SIX_LOCK_write && lock->readers) {
if (try) { if (try) {
atomic64_add(__SIX_VAL(write_locking, 1), atomic64_add(__SIX_VAL(write_locking, 1),
...@@ -222,7 +173,8 @@ static __always_inline bool do_six_trylock_type(struct six_lock *lock, ...@@ -222,7 +173,8 @@ static __always_inline bool do_six_trylock_type(struct six_lock *lock,
if (try && !ret) { if (try && !ret) {
old.v = atomic64_add_return(v, &lock->state.counter); old.v = atomic64_add_return(v, &lock->state.counter);
six_lock_wakeup(lock, old, SIX_LOCK_read); if (old.waiters & (1 << SIX_LOCK_read))
ret = -1 - SIX_LOCK_read;
} else { } else {
atomic64_add(v, &lock->state.counter); atomic64_add(v, &lock->state.counter);
} }
...@@ -248,14 +200,84 @@ static __always_inline bool do_six_trylock_type(struct six_lock *lock, ...@@ -248,14 +200,84 @@ static __always_inline bool do_six_trylock_type(struct six_lock *lock,
EBUG_ON(ret && !(lock->state.v & l[type].held_mask)); EBUG_ON(ret && !(lock->state.v & l[type].held_mask));
} }
if (ret) if (ret > 0)
six_set_owner(lock, type, old); six_set_owner(lock, type, old, task);
EBUG_ON(type == SIX_LOCK_write && (try || ret) && (lock->state.write_locking)); EBUG_ON(type == SIX_LOCK_write && (try || ret > 0) && (lock->state.write_locking));
return ret; return ret;
} }
static inline void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
{
struct six_lock_waiter *w, *next;
struct task_struct *task;
bool saw_one;
int ret;
again:
ret = 0;
saw_one = false;
raw_spin_lock(&lock->wait_lock);
list_for_each_entry_safe(w, next, &lock->wait_list, list) {
if (w->lock_want != lock_type)
continue;
if (saw_one && lock_type != SIX_LOCK_read)
goto unlock;
saw_one = true;
ret = __do_six_trylock_type(lock, lock_type, w->task, false);
if (ret <= 0)
goto unlock;
__list_del(w->list.prev, w->list.next);
task = w->task;
/*
* Do no writes to @w besides setting lock_acquired - otherwise
* we would need a memory barrier:
*/
barrier();
w->lock_acquired = true;
wake_up_process(task);
}
clear_bit(waitlist_bitnr(lock_type), (unsigned long *) &lock->state.v);
unlock:
raw_spin_unlock(&lock->wait_lock);
if (ret < 0) {
lock_type = -ret - 1;
goto again;
}
}
static inline void six_lock_wakeup(struct six_lock *lock,
union six_lock_state state,
enum six_lock_type lock_type)
{
if (lock_type == SIX_LOCK_write && state.read_lock)
return;
if (!(state.waiters & (1 << lock_type)))
return;
__six_lock_wakeup(lock, lock_type);
}
static bool do_six_trylock_type(struct six_lock *lock,
enum six_lock_type type,
bool try)
{
int ret;
ret = __do_six_trylock_type(lock, type, current, try);
if (ret < 0)
__six_lock_wakeup(lock, -ret - 1);
return ret > 0;
}
__always_inline __flatten __always_inline __flatten
static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type) static bool __six_trylock_type(struct six_lock *lock, enum six_lock_type type)
{ {
...@@ -315,7 +337,7 @@ static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type, ...@@ -315,7 +337,7 @@ static bool __six_relock_type(struct six_lock *lock, enum six_lock_type type,
old.v, old.v,
old.v + l[type].lock_val)) != old.v); old.v + l[type].lock_val)) != old.v);
six_set_owner(lock, type, old); six_set_owner(lock, type, old, current);
if (type != SIX_LOCK_write) if (type != SIX_LOCK_write)
six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read); six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read);
return true; return true;
...@@ -457,21 +479,24 @@ static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type ty ...@@ -457,21 +479,24 @@ static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type ty
smp_mb__after_atomic(); smp_mb__after_atomic();
} }
ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
if (ret)
goto out_before_sleep;
if (six_optimistic_spin(lock, type)) if (six_optimistic_spin(lock, type))
goto out_before_sleep; goto out;
lock_contended(&lock->dep_map, _RET_IP_); lock_contended(&lock->dep_map, _RET_IP_);
wait->task = current; wait->task = current;
wait->lock_want = type; wait->lock_want = type;
wait->lock_acquired = false;
raw_spin_lock(&lock->wait_lock); raw_spin_lock(&lock->wait_lock);
if (!(lock->state.waiters & (1 << type))) if (!(lock->state.waiters & (1 << type)))
set_bit(waitlist_bitnr(type), (unsigned long *) &lock->state.v); set_bit(waitlist_bitnr(type), (unsigned long *) &lock->state.v);
/*
* Retry taking the lock after taking waitlist lock, have raced with an
* unlock:
*/
ret = __do_six_trylock_type(lock, type, current, false);
if (ret <= 0) {
wait->start_time = local_clock(); wait->start_time = local_clock();
if (!list_empty(&lock->wait_list)) { if (!list_empty(&lock->wait_list)) {
...@@ -484,27 +509,43 @@ static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type ty ...@@ -484,27 +509,43 @@ static int __six_lock_type_slowpath(struct six_lock *lock, enum six_lock_type ty
} }
list_add_tail(&wait->list, &lock->wait_list); list_add_tail(&wait->list, &lock->wait_list);
}
raw_spin_unlock(&lock->wait_lock); raw_spin_unlock(&lock->wait_lock);
if (unlikely(ret > 0)) {
ret = 0;
goto out;
}
if (unlikely(ret < 0)) {
__six_lock_wakeup(lock, -ret - 1);
ret = 0;
}
while (1) { while (1) {
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
if (do_six_trylock_type(lock, type, false))
if (wait->lock_acquired)
break; break;
ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0; ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
if (ret) if (unlikely(ret)) {
raw_spin_lock(&lock->wait_lock);
if (!wait->lock_acquired)
list_del(&wait->list);
raw_spin_unlock(&lock->wait_lock);
if (wait->lock_acquired)
do_six_unlock_type(lock, type);
break; break;
}
schedule(); schedule();
} }
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
out:
raw_spin_lock(&lock->wait_lock); if (ret && type == SIX_LOCK_write && lock->state.write_locking) {
list_del(&wait->list);
raw_spin_unlock(&lock->wait_lock);
out_before_sleep:
if (ret && type == SIX_LOCK_write) {
old.v = atomic64_sub_return(__SIX_VAL(write_locking, 1), old.v = atomic64_sub_return(__SIX_VAL(write_locking, 1),
&lock->state.counter); &lock->state.counter);
six_lock_wakeup(lock, old, SIX_LOCK_read); six_lock_wakeup(lock, old, SIX_LOCK_read);
...@@ -546,27 +587,13 @@ static int __six_lock_type(struct six_lock *lock, enum six_lock_type type, ...@@ -546,27 +587,13 @@ static int __six_lock_type(struct six_lock *lock, enum six_lock_type type,
} }
__always_inline __flatten __always_inline __flatten
static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type) static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
{ {
const struct six_lock_vals l[] = LOCK_VALS; const struct six_lock_vals l[] = LOCK_VALS;
union six_lock_state state; union six_lock_state state;
EBUG_ON(type == SIX_LOCK_write && if (type == SIX_LOCK_intent)
!(lock->state.v & __SIX_LOCK_HELD_intent));
if (type != SIX_LOCK_write)
six_release(&lock->dep_map);
if (type == SIX_LOCK_intent) {
EBUG_ON(lock->owner != current);
if (lock->intent_lock_recurse) {
--lock->intent_lock_recurse;
return;
}
lock->owner = NULL; lock->owner = NULL;
}
if (type == SIX_LOCK_read && if (type == SIX_LOCK_read &&
lock->readers) { lock->readers) {
...@@ -583,6 +610,27 @@ static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type) ...@@ -583,6 +610,27 @@ static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
six_lock_wakeup(lock, state, l[type].unlock_wakeup); six_lock_wakeup(lock, state, l[type].unlock_wakeup);
} }
__always_inline __flatten
static void __six_unlock_type(struct six_lock *lock, enum six_lock_type type)
{
EBUG_ON(type == SIX_LOCK_write &&
!(lock->state.v & __SIX_LOCK_HELD_intent));
EBUG_ON((type == SIX_LOCK_write ||
type == SIX_LOCK_intent) &&
lock->owner != current);
if (type != SIX_LOCK_write)
six_release(&lock->dep_map);
if (type == SIX_LOCK_intent &&
lock->intent_lock_recurse) {
--lock->intent_lock_recurse;
return;
}
do_six_unlock_type(lock, type);
}
#define __SIX_LOCK(type) \ #define __SIX_LOCK(type) \
bool six_trylock_##type(struct six_lock *lock) \ bool six_trylock_##type(struct six_lock *lock) \
{ \ { \
...@@ -654,7 +702,7 @@ bool six_lock_tryupgrade(struct six_lock *lock) ...@@ -654,7 +702,7 @@ bool six_lock_tryupgrade(struct six_lock *lock)
if (lock->readers) if (lock->readers)
this_cpu_dec(*lock->readers); this_cpu_dec(*lock->readers);
six_set_owner(lock, SIX_LOCK_intent, old); six_set_owner(lock, SIX_LOCK_intent, old, current);
return true; return true;
} }
...@@ -713,8 +761,13 @@ EXPORT_SYMBOL_GPL(six_lock_increment); ...@@ -713,8 +761,13 @@ EXPORT_SYMBOL_GPL(six_lock_increment);
void six_lock_wakeup_all(struct six_lock *lock) void six_lock_wakeup_all(struct six_lock *lock)
{ {
union six_lock_state state = lock->state;
struct six_lock_waiter *w; struct six_lock_waiter *w;
six_lock_wakeup(lock, state, SIX_LOCK_read);
six_lock_wakeup(lock, state, SIX_LOCK_intent);
six_lock_wakeup(lock, state, SIX_LOCK_write);
raw_spin_lock(&lock->wait_lock); raw_spin_lock(&lock->wait_lock);
list_for_each_entry(w, &lock->wait_list, list) list_for_each_entry(w, &lock->wait_list, list)
wake_up_process(w->task); wake_up_process(w->task);
......
...@@ -110,11 +110,10 @@ struct six_lock { ...@@ -110,11 +110,10 @@ struct six_lock {
union six_lock_state state; union six_lock_state state;
unsigned intent_lock_recurse; unsigned intent_lock_recurse;
struct task_struct *owner; struct task_struct *owner;
unsigned __percpu *readers;
#ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER #ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER
struct optimistic_spin_queue osq; struct optimistic_spin_queue osq;
#endif #endif
unsigned __percpu *readers;
raw_spinlock_t wait_lock; raw_spinlock_t wait_lock;
struct list_head wait_list; struct list_head wait_list;
#ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_DEBUG_LOCK_ALLOC
...@@ -126,6 +125,7 @@ struct six_lock_waiter { ...@@ -126,6 +125,7 @@ struct six_lock_waiter {
struct list_head list; struct list_head list;
struct task_struct *task; struct task_struct *task;
enum six_lock_type lock_want; enum six_lock_type lock_want;
bool lock_acquired;
u64 start_time; u64 start_time;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment