Commit e3f2ddea authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] pi-futex: robust-futex exit

Fix robust PI-futexes to be properly unlocked on unexpected exit.

For this to work the kernel has to know whether a futex is a PI or a
non-PI one, because the semantics are different.  Since the space in
relevant glibc data structures is extremely scarce, the best solution is
to encode the 'PI' information in bit 0 of the robust list pointer.
Existing (non-PI) glibc robust futexes have this bit always zero, so the
ABI is kept.  New glibc with PI-robust-futexes will set this bit.

Further fixes from Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarUlrich Drepper <drepper@redhat.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 627371d7
...@@ -96,7 +96,8 @@ struct robust_list_head { ...@@ -96,7 +96,8 @@ struct robust_list_head {
long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
u32 __user *uaddr2, u32 val2, u32 val3); u32 __user *uaddr2, u32 val2, u32 val3);
extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr); extern int
handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
#ifdef CONFIG_FUTEX #ifdef CONFIG_FUTEX
extern void exit_robust_list(struct task_struct *curr); extern void exit_robust_list(struct task_struct *curr);
......
...@@ -495,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) ...@@ -495,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
} }
/* /*
* We are the first waiter - try to look up the real owner and * We are the first waiter - try to look up the real owner and attach
* attach the new pi_state to it: * the new pi_state to it, but bail out when the owner died bit is set
* and TID = 0:
*/ */
pid = uval & FUTEX_TID_MASK; pid = uval & FUTEX_TID_MASK;
if (!pid && (uval & FUTEX_OWNER_DIED))
return -ESRCH;
p = futex_find_get_task(pid); p = futex_find_get_task(pid);
if (!p) if (!p)
return -ESRCH; return -ESRCH;
...@@ -579,16 +582,17 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) ...@@ -579,16 +582,17 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
* kept enabled while there is PI state around. We must also * kept enabled while there is PI state around. We must also
* preserve the owner died bit.) * preserve the owner died bit.)
*/ */
newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid; if (!(uval & FUTEX_OWNER_DIED)) {
newval = FUTEX_WAITERS | new_owner->pid;
inc_preempt_count(); inc_preempt_count();
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
dec_preempt_count(); dec_preempt_count();
if (curval == -EFAULT) if (curval == -EFAULT)
return -EFAULT; return -EFAULT;
if (curval != uval) if (curval != uval)
return -EINVAL; return -EINVAL;
}
spin_lock_irq(&pi_state->owner->pi_lock); spin_lock_irq(&pi_state->owner->pi_lock);
WARN_ON(list_empty(&pi_state->list)); WARN_ON(list_empty(&pi_state->list));
...@@ -1443,9 +1447,11 @@ static int futex_unlock_pi(u32 __user *uaddr) ...@@ -1443,9 +1447,11 @@ static int futex_unlock_pi(u32 __user *uaddr)
* again. If it succeeds then we can return without waking * again. If it succeeds then we can return without waking
* anyone else up: * anyone else up:
*/ */
if (!(uval & FUTEX_OWNER_DIED)) {
inc_preempt_count(); inc_preempt_count();
uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
dec_preempt_count(); dec_preempt_count();
}
if (unlikely(uval == -EFAULT)) if (unlikely(uval == -EFAULT))
goto pi_faulted; goto pi_faulted;
...@@ -1478,9 +1484,11 @@ static int futex_unlock_pi(u32 __user *uaddr) ...@@ -1478,9 +1484,11 @@ static int futex_unlock_pi(u32 __user *uaddr)
/* /*
* No waiters - kernel unlocks the futex: * No waiters - kernel unlocks the futex:
*/ */
if (!(uval & FUTEX_OWNER_DIED)) {
ret = unlock_futex_pi(uaddr, uval); ret = unlock_futex_pi(uaddr, uval);
if (ret == -EFAULT) if (ret == -EFAULT)
goto pi_faulted; goto pi_faulted;
}
out_unlock: out_unlock:
spin_unlock(&hb->lock); spin_unlock(&hb->lock);
...@@ -1699,9 +1707,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr, ...@@ -1699,9 +1707,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
* Process a futex-list entry, check whether it's owned by the * Process a futex-list entry, check whether it's owned by the
* dying task, and do notification if so: * dying task, and do notification if so:
*/ */
int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
{ {
u32 uval, nval; u32 uval, nval, mval;
retry: retry:
if (get_user(uval, uaddr)) if (get_user(uval, uaddr))
...@@ -1718,17 +1726,41 @@ int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) ...@@ -1718,17 +1726,41 @@ int handle_futex_death(u32 __user *uaddr, struct task_struct *curr)
* thread-death.) The rest of the cleanup is done in * thread-death.) The rest of the cleanup is done in
* userspace. * userspace.
*/ */
nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
uval | FUTEX_OWNER_DIED); nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
if (nval == -EFAULT) if (nval == -EFAULT)
return -1; return -1;
if (nval != uval) if (nval != uval)
goto retry; goto retry;
/*
* Wake robust non-PI futexes here. The wakeup of
* PI futexes happens in exit_pi_state():
*/
if (!pi) {
if (uval & FUTEX_WAITERS) if (uval & FUTEX_WAITERS)
futex_wake(uaddr, 1); futex_wake(uaddr, 1);
} }
}
return 0;
}
/*
* Fetch a robust-list pointer. Bit 0 signals PI futexes:
*/
static inline int fetch_robust_entry(struct robust_list __user **entry,
struct robust_list __user **head, int *pi)
{
unsigned long uentry;
if (get_user(uentry, (unsigned long *)head))
return -EFAULT;
*entry = (void *)(uentry & ~1UL);
*pi = uentry & 1;
return 0; return 0;
} }
...@@ -1742,14 +1774,14 @@ void exit_robust_list(struct task_struct *curr) ...@@ -1742,14 +1774,14 @@ void exit_robust_list(struct task_struct *curr)
{ {
struct robust_list_head __user *head = curr->robust_list; struct robust_list_head __user *head = curr->robust_list;
struct robust_list __user *entry, *pending; struct robust_list __user *entry, *pending;
unsigned int limit = ROBUST_LIST_LIMIT; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
unsigned long futex_offset; unsigned long futex_offset;
/* /*
* Fetch the list head (which was registered earlier, via * Fetch the list head (which was registered earlier, via
* sys_set_robust_list()): * sys_set_robust_list()):
*/ */
if (get_user(entry, &head->list.next)) if (fetch_robust_entry(&entry, &head->list.next, &pi))
return; return;
/* /*
* Fetch the relative futex offset: * Fetch the relative futex offset:
...@@ -1760,10 +1792,11 @@ void exit_robust_list(struct task_struct *curr) ...@@ -1760,10 +1792,11 @@ void exit_robust_list(struct task_struct *curr)
* Fetch any possibly pending lock-add first, and handle it * Fetch any possibly pending lock-add first, and handle it
* if it exists: * if it exists:
*/ */
if (get_user(pending, &head->list_op_pending)) if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
return; return;
if (pending) if (pending)
handle_futex_death((void *)pending + futex_offset, curr); handle_futex_death((void *)pending + futex_offset, curr, pip);
while (entry != &head->list) { while (entry != &head->list) {
/* /*
...@@ -1772,12 +1805,12 @@ void exit_robust_list(struct task_struct *curr) ...@@ -1772,12 +1805,12 @@ void exit_robust_list(struct task_struct *curr)
*/ */
if (entry != pending) if (entry != pending)
if (handle_futex_death((void *)entry + futex_offset, if (handle_futex_death((void *)entry + futex_offset,
curr)) curr, pi))
return; return;
/* /*
* Fetch the next entry in the list: * Fetch the next entry in the list:
*/ */
if (get_user(entry, &entry->next)) if (fetch_robust_entry(&entry, &entry->next, &pi))
return; return;
/* /*
* Avoid excessively long or circular lists: * Avoid excessively long or circular lists:
......
...@@ -12,6 +12,23 @@ ...@@ -12,6 +12,23 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
/*
* Fetch a robust-list pointer. Bit 0 signals PI futexes:
*/
static inline int
fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
compat_uptr_t *head, int *pi)
{
if (get_user(*uentry, head))
return -EFAULT;
*entry = compat_ptr((*uentry) & ~1);
*pi = (unsigned int)(*uentry) & 1;
return 0;
}
/* /*
* Walk curr->robust_list (very carefully, it's a userspace list!) * Walk curr->robust_list (very carefully, it's a userspace list!)
* and mark any locks found there dead, and notify any waiters. * and mark any locks found there dead, and notify any waiters.
...@@ -22,17 +39,16 @@ void compat_exit_robust_list(struct task_struct *curr) ...@@ -22,17 +39,16 @@ void compat_exit_robust_list(struct task_struct *curr)
{ {
struct compat_robust_list_head __user *head = curr->compat_robust_list; struct compat_robust_list_head __user *head = curr->compat_robust_list;
struct robust_list __user *entry, *pending; struct robust_list __user *entry, *pending;
unsigned int limit = ROBUST_LIST_LIMIT, pi;
compat_uptr_t uentry, upending; compat_uptr_t uentry, upending;
unsigned int limit = ROBUST_LIST_LIMIT;
compat_long_t futex_offset; compat_long_t futex_offset;
/* /*
* Fetch the list head (which was registered earlier, via * Fetch the list head (which was registered earlier, via
* sys_set_robust_list()): * sys_set_robust_list()):
*/ */
if (get_user(uentry, &head->list.next)) if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
return; return;
entry = compat_ptr(uentry);
/* /*
* Fetch the relative futex offset: * Fetch the relative futex offset:
*/ */
...@@ -42,11 +58,11 @@ void compat_exit_robust_list(struct task_struct *curr) ...@@ -42,11 +58,11 @@ void compat_exit_robust_list(struct task_struct *curr)
* Fetch any possibly pending lock-add first, and handle it * Fetch any possibly pending lock-add first, and handle it
* if it exists: * if it exists:
*/ */
if (get_user(upending, &head->list_op_pending)) if (fetch_robust_entry(&upending, &pending,
&head->list_op_pending, &pi))
return; return;
pending = compat_ptr(upending);
if (upending) if (upending)
handle_futex_death((void *)pending + futex_offset, curr); handle_futex_death((void *)pending + futex_offset, curr, pi);
while (compat_ptr(uentry) != &head->list) { while (compat_ptr(uentry) != &head->list) {
/* /*
...@@ -55,15 +71,15 @@ void compat_exit_robust_list(struct task_struct *curr) ...@@ -55,15 +71,15 @@ void compat_exit_robust_list(struct task_struct *curr)
*/ */
if (entry != pending) if (entry != pending)
if (handle_futex_death((void *)entry + futex_offset, if (handle_futex_death((void *)entry + futex_offset,
curr)) curr, pi))
return; return;
/* /*
* Fetch the next entry in the list: * Fetch the next entry in the list:
*/ */
if (get_user(uentry, (compat_uptr_t *)&entry->next)) if (fetch_robust_entry(&uentry, &entry,
(compat_uptr_t *)&entry->next, &pi))
return; return;
entry = compat_ptr(uentry);
/* /*
* Avoid excessively long or circular lists: * Avoid excessively long or circular lists:
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment