Commit 18163dd1 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ben Hutchings

perf: Fix event->ctx locking

commit f63a8daa upstream.

There have been a few reported issues wrt. the lack of locking around
changing event->ctx. This patch tries to address those.

It avoids the whole rwsem thing; and while it appears to work, please
give it some thought in review.

What I did fail at is sensible runtime checks on the use of
event->ctx, the RCU use makes it very hard.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20150123125834.209535886@infradead.orgSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
Signed-off-by: default avatarBen Hutchings <ben@decadent.org.uk>
parent 16c30eea
...@@ -902,6 +902,77 @@ static void put_ctx(struct perf_event_context *ctx) ...@@ -902,6 +902,77 @@ static void put_ctx(struct perf_event_context *ctx)
} }
} }
/*
* Because of perf_event::ctx migration in sys_perf_event_open::move_group and
* perf_pmu_migrate_context() we need some magic.
*
* Those places that change perf_event::ctx will hold both
* perf_event_ctx::mutex of the 'old' and 'new' ctx value.
*
* Lock ordering is by mutex address. There is one other site where
* perf_event_context::mutex nests and that is put_event(). But remember that
* that is a parent<->child context relation, and migration does not affect
* children, therefore these two orderings should not interact.
*
* The change in perf_event::ctx does not affect children (as claimed above)
* because the sys_perf_event_open() case will install a new event and break
* the ctx parent<->child relation, and perf_pmu_migrate_context() is only
* concerned with cpuctx and that doesn't have children.
*
* The places that change perf_event::ctx will issue:
*
* perf_remove_from_context();
* synchronize_rcu();
* perf_install_in_context();
*
* to affect the change. The remove_from_context() + synchronize_rcu() should
* quiesce the event, after which we can install it in the new location. This
* means that only external vectors (perf_fops, prctl) can perturb the event
* while in transit. Therefore all such accessors should also acquire
* perf_event_context::mutex to serialize against this.
*
* However; because event->ctx can change while we're waiting to acquire
* ctx->mutex we must be careful and use the below perf_event_ctx_lock()
* function.
*
* Lock order:
* task_struct::perf_event_mutex
* perf_event_context::mutex
* perf_event_context::lock
* perf_event::child_mutex;
* perf_event::mmap_mutex
* mmap_sem
*/
static struct perf_event_context *perf_event_ctx_lock(struct perf_event *event)
{
struct perf_event_context *ctx;
again:
rcu_read_lock();
ctx = ACCESS_ONCE(event->ctx);
if (!atomic_inc_not_zero(&ctx->refcount)) {
rcu_read_unlock();
goto again;
}
rcu_read_unlock();
mutex_lock(&ctx->mutex);
if (event->ctx != ctx) {
mutex_unlock(&ctx->mutex);
put_ctx(ctx);
goto again;
}
return ctx;
}
static void perf_event_ctx_unlock(struct perf_event *event,
struct perf_event_context *ctx)
{
mutex_unlock(&ctx->mutex);
put_ctx(ctx);
}
/* /*
* This must be done under the ctx->lock, such as to serialize against * This must be done under the ctx->lock, such as to serialize against
* context_equiv(), therefore we cannot call put_ctx() since that might end up * context_equiv(), therefore we cannot call put_ctx() since that might end up
...@@ -1606,7 +1677,7 @@ int __perf_event_disable(void *info) ...@@ -1606,7 +1677,7 @@ int __perf_event_disable(void *info)
* is the current context on this CPU and preemption is disabled, * is the current context on this CPU and preemption is disabled,
* hence we can't get into perf_event_task_sched_out for this context. * hence we can't get into perf_event_task_sched_out for this context.
*/ */
void perf_event_disable(struct perf_event *event) static void _perf_event_disable(struct perf_event *event)
{ {
struct perf_event_context *ctx = event->ctx; struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task; struct task_struct *task = ctx->task;
...@@ -1647,6 +1718,19 @@ void perf_event_disable(struct perf_event *event) ...@@ -1647,6 +1718,19 @@ void perf_event_disable(struct perf_event *event)
} }
raw_spin_unlock_irq(&ctx->lock); raw_spin_unlock_irq(&ctx->lock);
} }
/*
* Strictly speaking kernel users cannot create groups and therefore this
* interface does not need the perf_event_ctx_lock() magic.
*/
void perf_event_disable(struct perf_event *event)
{
struct perf_event_context *ctx;
ctx = perf_event_ctx_lock(event);
_perf_event_disable(event);
perf_event_ctx_unlock(event, ctx);
}
EXPORT_SYMBOL_GPL(perf_event_disable); EXPORT_SYMBOL_GPL(perf_event_disable);
static void perf_set_shadow_time(struct perf_event *event, static void perf_set_shadow_time(struct perf_event *event,
...@@ -2107,7 +2191,7 @@ static int __perf_event_enable(void *info) ...@@ -2107,7 +2191,7 @@ static int __perf_event_enable(void *info)
* perf_event_for_each_child or perf_event_for_each as described * perf_event_for_each_child or perf_event_for_each as described
* for perf_event_disable. * for perf_event_disable.
*/ */
void perf_event_enable(struct perf_event *event) static void _perf_event_enable(struct perf_event *event)
{ {
struct perf_event_context *ctx = event->ctx; struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task; struct task_struct *task = ctx->task;
...@@ -2163,9 +2247,21 @@ void perf_event_enable(struct perf_event *event) ...@@ -2163,9 +2247,21 @@ void perf_event_enable(struct perf_event *event)
out: out:
raw_spin_unlock_irq(&ctx->lock); raw_spin_unlock_irq(&ctx->lock);
} }
/*
* See perf_event_disable();
*/
void perf_event_enable(struct perf_event *event)
{
struct perf_event_context *ctx;
ctx = perf_event_ctx_lock(event);
_perf_event_enable(event);
perf_event_ctx_unlock(event, ctx);
}
EXPORT_SYMBOL_GPL(perf_event_enable); EXPORT_SYMBOL_GPL(perf_event_enable);
int perf_event_refresh(struct perf_event *event, int refresh) static int _perf_event_refresh(struct perf_event *event, int refresh)
{ {
/* /*
* not supported on inherited events * not supported on inherited events
...@@ -2174,10 +2270,25 @@ int perf_event_refresh(struct perf_event *event, int refresh) ...@@ -2174,10 +2270,25 @@ int perf_event_refresh(struct perf_event *event, int refresh)
return -EINVAL; return -EINVAL;
atomic_add(refresh, &event->event_limit); atomic_add(refresh, &event->event_limit);
perf_event_enable(event); _perf_event_enable(event);
return 0; return 0;
} }
/*
* See perf_event_disable()
*/
int perf_event_refresh(struct perf_event *event, int refresh)
{
struct perf_event_context *ctx;
int ret;
ctx = perf_event_ctx_lock(event);
ret = _perf_event_refresh(event, refresh);
perf_event_ctx_unlock(event, ctx);
return ret;
}
EXPORT_SYMBOL_GPL(perf_event_refresh); EXPORT_SYMBOL_GPL(perf_event_refresh);
static void ctx_sched_out(struct perf_event_context *ctx, static void ctx_sched_out(struct perf_event_context *ctx,
...@@ -3373,7 +3484,16 @@ static void put_event(struct perf_event *event) ...@@ -3373,7 +3484,16 @@ static void put_event(struct perf_event *event)
rcu_read_unlock(); rcu_read_unlock();
if (owner) { if (owner) {
mutex_lock(&owner->perf_event_mutex); /*
* If we're here through perf_event_exit_task() we're already
* holding ctx->mutex which would be an inversion wrt. the
* normal lock order.
*
* However we can safely take this lock because its the child
* ctx->mutex.
*/
mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING);
/* /*
* We have to re-check the event->owner field, if it is cleared * We have to re-check the event->owner field, if it is cleared
* we raced with perf_event_exit_task(), acquiring the mutex * we raced with perf_event_exit_task(), acquiring the mutex
...@@ -3449,12 +3569,13 @@ static int perf_event_read_group(struct perf_event *event, ...@@ -3449,12 +3569,13 @@ static int perf_event_read_group(struct perf_event *event,
u64 read_format, char __user *buf) u64 read_format, char __user *buf)
{ {
struct perf_event *leader = event->group_leader, *sub; struct perf_event *leader = event->group_leader, *sub;
int n = 0, size = 0, ret = -EFAULT;
struct perf_event_context *ctx = leader->ctx; struct perf_event_context *ctx = leader->ctx;
u64 values[5]; int n = 0, size = 0, ret;
u64 count, enabled, running; u64 count, enabled, running;
u64 values[5];
lockdep_assert_held(&ctx->mutex);
mutex_lock(&ctx->mutex);
count = perf_event_read_value(leader, &enabled, &running); count = perf_event_read_value(leader, &enabled, &running);
values[n++] = 1 + leader->nr_siblings; values[n++] = 1 + leader->nr_siblings;
...@@ -3469,7 +3590,7 @@ static int perf_event_read_group(struct perf_event *event, ...@@ -3469,7 +3590,7 @@ static int perf_event_read_group(struct perf_event *event,
size = n * sizeof(u64); size = n * sizeof(u64);
if (copy_to_user(buf, values, size)) if (copy_to_user(buf, values, size))
goto unlock; return -EFAULT;
ret = size; ret = size;
...@@ -3483,14 +3604,11 @@ static int perf_event_read_group(struct perf_event *event, ...@@ -3483,14 +3604,11 @@ static int perf_event_read_group(struct perf_event *event,
size = n * sizeof(u64); size = n * sizeof(u64);
if (copy_to_user(buf + ret, values, size)) { if (copy_to_user(buf + ret, values, size)) {
ret = -EFAULT; return -EFAULT;
goto unlock;
} }
ret += size; ret += size;
} }
unlock:
mutex_unlock(&ctx->mutex);
return ret; return ret;
} }
...@@ -3549,8 +3667,14 @@ static ssize_t ...@@ -3549,8 +3667,14 @@ static ssize_t
perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{ {
struct perf_event *event = file->private_data; struct perf_event *event = file->private_data;
struct perf_event_context *ctx;
int ret;
return perf_read_hw(event, buf, count); ctx = perf_event_ctx_lock(event);
ret = perf_read_hw(event, buf, count);
perf_event_ctx_unlock(event, ctx);
return ret;
} }
static unsigned int perf_poll(struct file *file, poll_table *wait) static unsigned int perf_poll(struct file *file, poll_table *wait)
...@@ -3574,7 +3698,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) ...@@ -3574,7 +3698,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
return events; return events;
} }
static void perf_event_reset(struct perf_event *event) static void _perf_event_reset(struct perf_event *event)
{ {
(void)perf_event_read(event); (void)perf_event_read(event);
local64_set(&event->count, 0); local64_set(&event->count, 0);
...@@ -3593,6 +3717,7 @@ static void perf_event_for_each_child(struct perf_event *event, ...@@ -3593,6 +3717,7 @@ static void perf_event_for_each_child(struct perf_event *event,
struct perf_event *child; struct perf_event *child;
WARN_ON_ONCE(event->ctx->parent_ctx); WARN_ON_ONCE(event->ctx->parent_ctx);
mutex_lock(&event->child_mutex); mutex_lock(&event->child_mutex);
func(event); func(event);
list_for_each_entry(child, &event->child_list, child_list) list_for_each_entry(child, &event->child_list, child_list)
...@@ -3606,14 +3731,13 @@ static void perf_event_for_each(struct perf_event *event, ...@@ -3606,14 +3731,13 @@ static void perf_event_for_each(struct perf_event *event,
struct perf_event_context *ctx = event->ctx; struct perf_event_context *ctx = event->ctx;
struct perf_event *sibling; struct perf_event *sibling;
WARN_ON_ONCE(ctx->parent_ctx); lockdep_assert_held(&ctx->mutex);
mutex_lock(&ctx->mutex);
event = event->group_leader; event = event->group_leader;
perf_event_for_each_child(event, func); perf_event_for_each_child(event, func);
list_for_each_entry(sibling, &event->sibling_list, group_entry) list_for_each_entry(sibling, &event->sibling_list, group_entry)
perf_event_for_each_child(sibling, func); perf_event_for_each_child(sibling, func);
mutex_unlock(&ctx->mutex);
} }
struct period_event { struct period_event {
...@@ -3725,25 +3849,24 @@ static int perf_event_set_output(struct perf_event *event, ...@@ -3725,25 +3849,24 @@ static int perf_event_set_output(struct perf_event *event,
struct perf_event *output_event); struct perf_event *output_event);
static int perf_event_set_filter(struct perf_event *event, void __user *arg); static int perf_event_set_filter(struct perf_event *event, void __user *arg);
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
{ {
struct perf_event *event = file->private_data;
void (*func)(struct perf_event *); void (*func)(struct perf_event *);
u32 flags = arg; u32 flags = arg;
switch (cmd) { switch (cmd) {
case PERF_EVENT_IOC_ENABLE: case PERF_EVENT_IOC_ENABLE:
func = perf_event_enable; func = _perf_event_enable;
break; break;
case PERF_EVENT_IOC_DISABLE: case PERF_EVENT_IOC_DISABLE:
func = perf_event_disable; func = _perf_event_disable;
break; break;
case PERF_EVENT_IOC_RESET: case PERF_EVENT_IOC_RESET:
func = perf_event_reset; func = _perf_event_reset;
break; break;
case PERF_EVENT_IOC_REFRESH: case PERF_EVENT_IOC_REFRESH:
return perf_event_refresh(event, arg); return _perf_event_refresh(event, arg);
case PERF_EVENT_IOC_PERIOD: case PERF_EVENT_IOC_PERIOD:
return perf_event_period(event, (u64 __user *)arg); return perf_event_period(event, (u64 __user *)arg);
...@@ -3790,6 +3913,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ...@@ -3790,6 +3913,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return 0; return 0;
} }
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct perf_event *event = file->private_data;
struct perf_event_context *ctx;
long ret;
ctx = perf_event_ctx_lock(event);
ret = _perf_ioctl(event, cmd, arg);
perf_event_ctx_unlock(event, ctx);
return ret;
}
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
static long perf_compat_ioctl(struct file *file, unsigned int cmd, static long perf_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg) unsigned long arg)
...@@ -3812,11 +3948,15 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd, ...@@ -3812,11 +3948,15 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd,
int perf_event_task_enable(void) int perf_event_task_enable(void)
{ {
struct perf_event_context *ctx;
struct perf_event *event; struct perf_event *event;
mutex_lock(&current->perf_event_mutex); mutex_lock(&current->perf_event_mutex);
list_for_each_entry(event, &current->perf_event_list, owner_entry) list_for_each_entry(event, &current->perf_event_list, owner_entry) {
perf_event_for_each_child(event, perf_event_enable); ctx = perf_event_ctx_lock(event);
perf_event_for_each_child(event, _perf_event_enable);
perf_event_ctx_unlock(event, ctx);
}
mutex_unlock(&current->perf_event_mutex); mutex_unlock(&current->perf_event_mutex);
return 0; return 0;
...@@ -3824,11 +3964,15 @@ int perf_event_task_enable(void) ...@@ -3824,11 +3964,15 @@ int perf_event_task_enable(void)
int perf_event_task_disable(void) int perf_event_task_disable(void)
{ {
struct perf_event_context *ctx;
struct perf_event *event; struct perf_event *event;
mutex_lock(&current->perf_event_mutex); mutex_lock(&current->perf_event_mutex);
list_for_each_entry(event, &current->perf_event_list, owner_entry) list_for_each_entry(event, &current->perf_event_list, owner_entry) {
perf_event_for_each_child(event, perf_event_disable); ctx = perf_event_ctx_lock(event);
perf_event_for_each_child(event, _perf_event_disable);
perf_event_ctx_unlock(event, ctx);
}
mutex_unlock(&current->perf_event_mutex); mutex_unlock(&current->perf_event_mutex);
return 0; return 0;
...@@ -7158,6 +7302,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) ...@@ -7158,6 +7302,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
return ret; return ret;
} }
static void mutex_lock_double(struct mutex *a, struct mutex *b)
{
if (b < a)
swap(a, b);
mutex_lock(a);
mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
}
/** /**
* sys_perf_event_open - open a performance event, associate it to a task/cpu * sys_perf_event_open - open a performance event, associate it to a task/cpu
* *
...@@ -7173,7 +7326,7 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -7173,7 +7326,7 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event *group_leader = NULL, *output_event = NULL; struct perf_event *group_leader = NULL, *output_event = NULL;
struct perf_event *event, *sibling; struct perf_event *event, *sibling;
struct perf_event_attr attr; struct perf_event_attr attr;
struct perf_event_context *ctx; struct perf_event_context *ctx, *uninitialized_var(gctx);
struct file *event_file = NULL; struct file *event_file = NULL;
struct fd group = {NULL, 0}; struct fd group = {NULL, 0};
struct task_struct *task = NULL; struct task_struct *task = NULL;
...@@ -7369,9 +7522,14 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -7369,9 +7522,14 @@ SYSCALL_DEFINE5(perf_event_open,
} }
if (move_group) { if (move_group) {
struct perf_event_context *gctx = group_leader->ctx; gctx = group_leader->ctx;
/*
* See perf_event_ctx_lock() for comments on the details
* of swizzling perf_event::ctx.
*/
mutex_lock_double(&gctx->mutex, &ctx->mutex);
mutex_lock(&gctx->mutex);
perf_remove_from_context(group_leader, false); perf_remove_from_context(group_leader, false);
/* /*
...@@ -7386,15 +7544,19 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -7386,15 +7544,19 @@ SYSCALL_DEFINE5(perf_event_open,
perf_event__state_init(sibling); perf_event__state_init(sibling);
put_ctx(gctx); put_ctx(gctx);
} }
mutex_unlock(&gctx->mutex); } else {
put_ctx(gctx); mutex_lock(&ctx->mutex);
} }
WARN_ON_ONCE(ctx->parent_ctx); WARN_ON_ONCE(ctx->parent_ctx);
mutex_lock(&ctx->mutex);
if (move_group) { if (move_group) {
/*
* Wait for everybody to stop referencing the events through
* the old lists, before installing it on new lists.
*/
synchronize_rcu(); synchronize_rcu();
perf_install_in_context(ctx, group_leader, group_leader->cpu); perf_install_in_context(ctx, group_leader, group_leader->cpu);
get_ctx(ctx); get_ctx(ctx);
list_for_each_entry(sibling, &group_leader->sibling_list, list_for_each_entry(sibling, &group_leader->sibling_list,
...@@ -7406,6 +7568,11 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -7406,6 +7568,11 @@ SYSCALL_DEFINE5(perf_event_open,
perf_install_in_context(ctx, event, event->cpu); perf_install_in_context(ctx, event, event->cpu);
perf_unpin_context(ctx); perf_unpin_context(ctx);
if (move_group) {
mutex_unlock(&gctx->mutex);
put_ctx(gctx);
}
mutex_unlock(&ctx->mutex); mutex_unlock(&ctx->mutex);
put_online_cpus(); put_online_cpus();
...@@ -7508,7 +7675,11 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) ...@@ -7508,7 +7675,11 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
mutex_lock(&src_ctx->mutex); /*
* See perf_event_ctx_lock() for comments on the details
* of swizzling perf_event::ctx.
*/
mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
list_for_each_entry_safe(event, tmp, &src_ctx->event_list, list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
event_entry) { event_entry) {
perf_remove_from_context(event, false); perf_remove_from_context(event, false);
...@@ -7516,11 +7687,9 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) ...@@ -7516,11 +7687,9 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
put_ctx(src_ctx); put_ctx(src_ctx);
list_add(&event->migrate_entry, &events); list_add(&event->migrate_entry, &events);
} }
mutex_unlock(&src_ctx->mutex);
synchronize_rcu(); synchronize_rcu();
mutex_lock(&dst_ctx->mutex);
list_for_each_entry_safe(event, tmp, &events, migrate_entry) { list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
list_del(&event->migrate_entry); list_del(&event->migrate_entry);
if (event->state >= PERF_EVENT_STATE_OFF) if (event->state >= PERF_EVENT_STATE_OFF)
...@@ -7530,6 +7699,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) ...@@ -7530,6 +7699,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
get_ctx(dst_ctx); get_ctx(dst_ctx);
} }
mutex_unlock(&dst_ctx->mutex); mutex_unlock(&dst_ctx->mutex);
mutex_unlock(&src_ctx->mutex);
} }
EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment