Commit 3a38bb98 authored by Yonghong Song's avatar Yonghong Song Committed by Daniel Borkmann

bpf/tracing: fix a deadlock in perf_event_detach_bpf_prog

syzbot reported a possible deadlock in perf_event_detach_bpf_prog.
The error details:
  ======================================================
  WARNING: possible circular locking dependency detected
  4.16.0-rc7+ #3 Not tainted
  ------------------------------------------------------
  syz-executor7/24531 is trying to acquire lock:
   (bpf_event_mutex){+.+.}, at: [<000000008a849b07>] perf_event_detach_bpf_prog+0x92/0x3d0 kernel/trace/bpf_trace.c:854

  but task is already holding lock:
   (&mm->mmap_sem){++++}, at: [<0000000038768f87>] vm_mmap_pgoff+0x198/0x280 mm/util.c:353

  which lock already depends on the new lock.

  the existing dependency chain (in reverse order) is:

  -> #1 (&mm->mmap_sem){++++}:
       __might_fault+0x13a/0x1d0 mm/memory.c:4571
       _copy_to_user+0x2c/0xc0 lib/usercopy.c:25
       copy_to_user include/linux/uaccess.h:155 [inline]
       bpf_prog_array_copy_info+0xf2/0x1c0 kernel/bpf/core.c:1694
       perf_event_query_prog_array+0x1c7/0x2c0 kernel/trace/bpf_trace.c:891
       _perf_ioctl kernel/events/core.c:4750 [inline]
       perf_ioctl+0x3e1/0x1480 kernel/events/core.c:4770
       vfs_ioctl fs/ioctl.c:46 [inline]
       do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686
       SYSC_ioctl fs/ioctl.c:701 [inline]
       SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692
       do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
       entry_SYSCALL_64_after_hwframe+0x42/0xb7

  -> #0 (bpf_event_mutex){+.+.}:
       lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3920
       __mutex_lock_common kernel/locking/mutex.c:756 [inline]
       __mutex_lock+0x16f/0x1a80 kernel/locking/mutex.c:893
       mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
       perf_event_detach_bpf_prog+0x92/0x3d0 kernel/trace/bpf_trace.c:854
       perf_event_free_bpf_prog kernel/events/core.c:8147 [inline]
       _free_event+0xbdb/0x10f0 kernel/events/core.c:4116
       put_event+0x24/0x30 kernel/events/core.c:4204
       perf_mmap_close+0x60d/0x1010 kernel/events/core.c:5172
       remove_vma+0xb4/0x1b0 mm/mmap.c:172
       remove_vma_list mm/mmap.c:2490 [inline]
       do_munmap+0x82a/0xdf0 mm/mmap.c:2731
       mmap_region+0x59e/0x15a0 mm/mmap.c:1646
       do_mmap+0x6c0/0xe00 mm/mmap.c:1483
       do_mmap_pgoff include/linux/mm.h:2223 [inline]
       vm_mmap_pgoff+0x1de/0x280 mm/util.c:355
       SYSC_mmap_pgoff mm/mmap.c:1533 [inline]
       SyS_mmap_pgoff+0x462/0x5f0 mm/mmap.c:1491
       SYSC_mmap arch/x86/kernel/sys_x86_64.c:100 [inline]
       SyS_mmap+0x16/0x20 arch/x86/kernel/sys_x86_64.c:91
       do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
       entry_SYSCALL_64_after_hwframe+0x42/0xb7

  other info that might help us debug this:

   Possible unsafe locking scenario:

         CPU0                    CPU1
         ----                    ----
    lock(&mm->mmap_sem);
                                 lock(bpf_event_mutex);
                                 lock(&mm->mmap_sem);
    lock(bpf_event_mutex);

   *** DEADLOCK ***
  ======================================================

The bug is introduced by Commit f371b304 ("bpf/tracing: allow
user space to query prog array on the same tp") where copy_to_user,
which requires mm->mmap_sem, is called inside bpf_event_mutex lock.
At the same time, during perf_event file descriptor close,
mm->mmap_sem is held first and then subsequent
perf_event_detach_bpf_prog needs bpf_event_mutex lock.
Such a senario caused a deadlock.

As suggested by Daniel, moving copy_to_user out of the
bpf_event_mutex lock should fix the problem.

Fixes: f371b304 ("bpf/tracing: allow user space to query prog array on the same tp")
Reported-by: syzbot+dc5ca0e4c9bfafaf2bae@syzkaller.appspotmail.com
Signed-off-by: default avatarYonghong Song <yhs@fb.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent 0abf854d
...@@ -339,8 +339,8 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, ...@@ -339,8 +339,8 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
struct bpf_prog *old_prog); struct bpf_prog *old_prog);
int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
__u32 __user *prog_ids, u32 request_cnt, u32 *prog_ids, u32 request_cnt,
__u32 __user *prog_cnt); u32 *prog_cnt);
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
struct bpf_prog *exclude_prog, struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog, struct bpf_prog *include_prog,
......
...@@ -1572,13 +1572,32 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs) ...@@ -1572,13 +1572,32 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)
return cnt; return cnt;
} }
static bool bpf_prog_array_copy_core(struct bpf_prog **prog,
u32 *prog_ids,
u32 request_cnt)
{
int i = 0;
for (; *prog; prog++) {
if (*prog == &dummy_bpf_prog.prog)
continue;
prog_ids[i] = (*prog)->aux->id;
if (++i == request_cnt) {
prog++;
break;
}
}
return !!(*prog);
}
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
__u32 __user *prog_ids, u32 cnt) __u32 __user *prog_ids, u32 cnt)
{ {
struct bpf_prog **prog; struct bpf_prog **prog;
unsigned long err = 0; unsigned long err = 0;
u32 i = 0, *ids;
bool nospc; bool nospc;
u32 *ids;
/* users of this function are doing: /* users of this function are doing:
* cnt = bpf_prog_array_length(); * cnt = bpf_prog_array_length();
...@@ -1595,16 +1614,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, ...@@ -1595,16 +1614,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
return -ENOMEM; return -ENOMEM;
rcu_read_lock(); rcu_read_lock();
prog = rcu_dereference(progs)->progs; prog = rcu_dereference(progs)->progs;
for (; *prog; prog++) { nospc = bpf_prog_array_copy_core(prog, ids, cnt);
if (*prog == &dummy_bpf_prog.prog)
continue;
ids[i] = (*prog)->aux->id;
if (++i == cnt) {
prog++;
break;
}
}
nospc = !!(*prog);
rcu_read_unlock(); rcu_read_unlock();
err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
kfree(ids); kfree(ids);
...@@ -1683,22 +1693,25 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, ...@@ -1683,22 +1693,25 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
} }
int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
__u32 __user *prog_ids, u32 request_cnt, u32 *prog_ids, u32 request_cnt,
__u32 __user *prog_cnt) u32 *prog_cnt)
{ {
struct bpf_prog **prog;
u32 cnt = 0; u32 cnt = 0;
if (array) if (array)
cnt = bpf_prog_array_length(array); cnt = bpf_prog_array_length(array);
if (copy_to_user(prog_cnt, &cnt, sizeof(cnt))) *prog_cnt = cnt;
return -EFAULT;
/* return early if user requested only program count or nothing to copy */ /* return early if user requested only program count or nothing to copy */
if (!request_cnt || !cnt) if (!request_cnt || !cnt)
return 0; return 0;
return bpf_prog_array_copy_to_user(array, prog_ids, request_cnt); /* this function is called under trace/bpf_trace.c: bpf_event_mutex */
prog = rcu_dereference_check(array, 1)->progs;
return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC
: 0;
} }
static void bpf_prog_free_deferred(struct work_struct *work) static void bpf_prog_free_deferred(struct work_struct *work)
......
...@@ -977,6 +977,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) ...@@ -977,6 +977,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
{ {
struct perf_event_query_bpf __user *uquery = info; struct perf_event_query_bpf __user *uquery = info;
struct perf_event_query_bpf query = {}; struct perf_event_query_bpf query = {};
u32 *ids, prog_cnt, ids_len;
int ret; int ret;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
...@@ -985,16 +986,32 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) ...@@ -985,16 +986,32 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
return -EINVAL; return -EINVAL;
if (copy_from_user(&query, uquery, sizeof(query))) if (copy_from_user(&query, uquery, sizeof(query)))
return -EFAULT; return -EFAULT;
if (query.ids_len > BPF_TRACE_MAX_PROGS)
ids_len = query.ids_len;
if (ids_len > BPF_TRACE_MAX_PROGS)
return -E2BIG; return -E2BIG;
ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN);
if (!ids)
return -ENOMEM;
/*
* The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which
* is required when user only wants to check for uquery->prog_cnt.
* There is no need to check for it since the case is handled
* gracefully in bpf_prog_array_copy_info.
*/
mutex_lock(&bpf_event_mutex); mutex_lock(&bpf_event_mutex);
ret = bpf_prog_array_copy_info(event->tp_event->prog_array, ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
uquery->ids, ids,
query.ids_len, ids_len,
&uquery->prog_cnt); &prog_cnt);
mutex_unlock(&bpf_event_mutex); mutex_unlock(&bpf_event_mutex);
if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
copy_to_user(uquery->ids, ids, ids_len * sizeof(u32)))
ret = -EFAULT;
kfree(ids);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment