Commit ab51fbab authored by Davidlohr Bueso's avatar Davidlohr Bueso Committed by Thomas Gleixner

futex: Fault/error injection capabilities

Although futexes are well known for being a royal pita,
we really have very little debugging capabilities - except
for relying on tglx's eye half the time.

By simply making use of the existing fault-injection machinery,
we can improve this situation, allowing generating artificial
uaddress faults and deadlock scenarios. Of course, when this is
disabled in production systems, the overhead for failure checks
is practically zero -- so this is very cheap at the same time.
Future work would be nice to now enhance trinity to make use of
this.

There is a special tunable 'ignore-private', which can filter
out private futexes. Given the tsk->make_it_fail filter and
this option, pi futexes can be narrowed down pretty closely.
Signed-off-by: default avatarDavidlohr Bueso <dbueso@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Darren Hart <darren@dvhart.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Link: http://lkml.kernel.org/r/1435645562-975-3-git-send-email-dave@stgolabs.netSigned-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 767f509c
...@@ -15,6 +15,10 @@ o fail_page_alloc ...@@ -15,6 +15,10 @@ o fail_page_alloc
injects page allocation failures. (alloc_pages(), get_free_pages(), ...) injects page allocation failures. (alloc_pages(), get_free_pages(), ...)
o fail_futex
injects futex deadlock and uaddr fault errors.
o fail_make_request o fail_make_request
injects disk IO errors on devices permitted by setting injects disk IO errors on devices permitted by setting
...@@ -113,6 +117,12 @@ configuration of fault-injection capabilities. ...@@ -113,6 +117,12 @@ configuration of fault-injection capabilities.
specifies the minimum page allocation order to be injected specifies the minimum page allocation order to be injected
failures. failures.
- /sys/kernel/debug/fail_futex/ignore-private:
Format: { 'Y' | 'N' }
default is 'N', setting it to 'Y' will disable failure injections
when dealing with private (address space) futexes.
o Boot option o Boot option
In order to inject faults while debugfs is not available (early boot time), In order to inject faults while debugfs is not available (early boot time),
...@@ -121,6 +131,7 @@ use the boot option: ...@@ -121,6 +131,7 @@ use the boot option:
failslab= failslab=
fail_page_alloc= fail_page_alloc=
fail_make_request= fail_make_request=
fail_futex=
mmc_core.fail_request=<interval>,<probability>,<space>,<times> mmc_core.fail_request=<interval>,<probability>,<space>,<times>
How to add new fault injection capability How to add new fault injection capability
......
...@@ -64,6 +64,7 @@ ...@@ -64,6 +64,7 @@
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/bootmem.h> #include <linux/bootmem.h>
#include <linux/fault-inject.h>
#include <asm/futex.h> #include <asm/futex.h>
...@@ -258,6 +259,66 @@ static unsigned long __read_mostly futex_hashsize; ...@@ -258,6 +259,66 @@ static unsigned long __read_mostly futex_hashsize;
static struct futex_hash_bucket *futex_queues; static struct futex_hash_bucket *futex_queues;
/*
* Fault injections for futexes.
*/
#ifdef CONFIG_FAIL_FUTEX
static struct {
struct fault_attr attr;
u32 ignore_private;
} fail_futex = {
.attr = FAULT_ATTR_INITIALIZER,
.ignore_private = 0,
};
static int __init setup_fail_futex(char *str)
{
return setup_fault_attr(&fail_futex.attr, str);
}
__setup("fail_futex=", setup_fail_futex);
bool should_fail_futex(bool fshared)
{
if (fail_futex.ignore_private && !fshared)
return false;
return should_fail(&fail_futex.attr, 1);
}
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
static int __init fail_futex_debugfs(void)
{
umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
struct dentry *dir;
dir = fault_create_debugfs_attr("fail_futex", NULL,
&fail_futex.attr);
if (IS_ERR(dir))
return PTR_ERR(dir);
if (!debugfs_create_bool("ignore-private", mode, dir,
&fail_futex.ignore_private)) {
debugfs_remove_recursive(dir);
return -ENOMEM;
}
return 0;
}
late_initcall(fail_futex_debugfs);
#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
#else
static inline bool should_fail_futex(bool fshared)
{
return false;
}
#endif /* CONFIG_FAIL_FUTEX */
static inline void futex_get_mm(union futex_key *key) static inline void futex_get_mm(union futex_key *key)
{ {
atomic_inc(&key->private.mm->mm_count); atomic_inc(&key->private.mm->mm_count);
...@@ -413,6 +474,9 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) ...@@ -413,6 +474,9 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
if (unlikely(!access_ok(rw, uaddr, sizeof(u32)))) if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
return -EFAULT; return -EFAULT;
if (unlikely(should_fail_futex(fshared)))
return -EFAULT;
/* /*
* PROCESS_PRIVATE futexes are fast. * PROCESS_PRIVATE futexes are fast.
* As the mm cannot disappear under us and the 'key' only needs * As the mm cannot disappear under us and the 'key' only needs
...@@ -428,6 +492,10 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) ...@@ -428,6 +492,10 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
} }
again: again:
/* Ignore any VERIFY_READ mapping (futex common case) */
if (unlikely(should_fail_futex(fshared)))
return -EFAULT;
err = get_user_pages_fast(address, 1, 1, &page); err = get_user_pages_fast(address, 1, 1, &page);
/* /*
* If write access is not required (eg. FUTEX_WAIT), try * If write access is not required (eg. FUTEX_WAIT), try
...@@ -516,7 +584,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) ...@@ -516,7 +584,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
* A RO anonymous page will never change and thus doesn't make * A RO anonymous page will never change and thus doesn't make
* sense for futex operations. * sense for futex operations.
*/ */
if (ro) { if (unlikely(should_fail_futex(fshared)) || ro) {
err = -EFAULT; err = -EFAULT;
goto out; goto out;
} }
...@@ -974,6 +1042,9 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) ...@@ -974,6 +1042,9 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
{ {
u32 uninitialized_var(curval); u32 uninitialized_var(curval);
if (unlikely(should_fail_futex(true)))
return -EFAULT;
if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
return -EFAULT; return -EFAULT;
...@@ -1015,12 +1086,18 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, ...@@ -1015,12 +1086,18 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
if (get_futex_value_locked(&uval, uaddr)) if (get_futex_value_locked(&uval, uaddr))
return -EFAULT; return -EFAULT;
if (unlikely(should_fail_futex(true)))
return -EFAULT;
/* /*
* Detect deadlocks. * Detect deadlocks.
*/ */
if ((unlikely((uval & FUTEX_TID_MASK) == vpid))) if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
return -EDEADLK; return -EDEADLK;
if ((unlikely(should_fail_futex(true))))
return -EDEADLK;
/* /*
* Lookup existing state first. If it exists, try to attach to * Lookup existing state first. If it exists, try to attach to
* its pi_state. * its pi_state.
...@@ -1155,6 +1232,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, ...@@ -1155,6 +1232,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
*/ */
newval = FUTEX_WAITERS | task_pid_vnr(new_owner); newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
if (unlikely(should_fail_futex(true)))
ret = -EFAULT;
if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
ret = -EFAULT; ret = -EFAULT;
else if (curval != uval) else if (curval != uval)
...@@ -1457,6 +1537,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, ...@@ -1457,6 +1537,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
if (get_futex_value_locked(&curval, pifutex)) if (get_futex_value_locked(&curval, pifutex))
return -EFAULT; return -EFAULT;
if (unlikely(should_fail_futex(true)))
return -EFAULT;
/* /*
* Find the top_waiter and determine if there are additional waiters. * Find the top_waiter and determine if there are additional waiters.
* If the caller intends to requeue more than 1 waiter to pifutex, * If the caller intends to requeue more than 1 waiter to pifutex,
...@@ -2537,7 +2620,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, ...@@ -2537,7 +2620,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
* futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
* @uaddr: the futex we initially wait on (non-pi) * @uaddr: the futex we initially wait on (non-pi)
* @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
* the same type, no requeueing from private to shared, etc. * the same type, no requeueing from private to shared, etc.
* @val: the expected value of uaddr * @val: the expected value of uaddr
* @abs_time: absolute timeout * @abs_time: absolute timeout
* @bitset: 32 bit wakeup bitset set by userspace, defaults to all * @bitset: 32 bit wakeup bitset set by userspace, defaults to all
...@@ -3012,6 +3095,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, ...@@ -3012,6 +3095,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
cmd == FUTEX_WAIT_BITSET || cmd == FUTEX_WAIT_BITSET ||
cmd == FUTEX_WAIT_REQUEUE_PI)) { cmd == FUTEX_WAIT_REQUEUE_PI)) {
if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
return -EFAULT;
if (copy_from_user(&ts, utime, sizeof(ts)) != 0) if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
return -EFAULT; return -EFAULT;
if (!timespec_valid(&ts)) if (!timespec_valid(&ts))
......
...@@ -1542,6 +1542,13 @@ config FAIL_MMC_REQUEST ...@@ -1542,6 +1542,13 @@ config FAIL_MMC_REQUEST
and to test how the mmc host driver handles retries from and to test how the mmc host driver handles retries from
the block device. the block device.
config FAIL_FUTEX
bool "Fault-injection capability for futexes"
select DEBUG_FS
depends on FAULT_INJECTION && FUTEX
help
Provide fault-injection capability for futexes.
config FAULT_INJECTION_DEBUG_FS config FAULT_INJECTION_DEBUG_FS
bool "Debugfs entries for fault-injection capabilities" bool "Debugfs entries for fault-injection capabilities"
depends on FAULT_INJECTION && SYSFS && DEBUG_FS depends on FAULT_INJECTION && SYSFS && DEBUG_FS
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment