Commit fed8f509 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'percpu_ref-rcu-audit-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc

Pull percpu_ref rcu fixes from Tejun Heo:
 "Jann Horn found that aio was depending on the internal RCU grace
  periods of percpu-ref and that it's broken because aio uses regular
  RCU while percpu_ref uses sched-RCU.

  Depending on percpu_ref's internal grace periods isn't a good idea
  because

   - The RCU type might not match.

   - percpu_ref's grace periods are used to switch to atomic mode. They
     aren't between the last put and the invocation of the last release.
     This is easy to get confused about and can lead to subtle bugs.

   - percpu_ref might not have grace periods at all depending on its
     current operation mode.

  This patchset audits and fixes percpu_ref users for their RCU usages"

[ There's a continuation of this series that clarifies percpu_ref
  documentation that the internal grace periods must not be depended
  upon, and introduces rcu_work to simplify bouncing to a workqueue
  after an RCU grace period.

  That will go in for 4.17 - this is just the minimal set with the fixes
  that are tagged for -stable ]

* 'percpu_ref-rcu-audit-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc:
  RDMAVT: Fix synchronization around percpu_ref
  fs/aio: Use RCU accessors for kioctx_table->table[]
  fs/aio: Add explicit RCU grace period when freeing kioctx
parents 3e04040d 74b44bbe
...@@ -489,11 +489,13 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t) ...@@ -489,11 +489,13 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t)
unsigned long timeout; unsigned long timeout;
struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device); struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
if (percpu_ref_is_zero(&mr->refcount)) if (mr->lkey) {
return 0;
/* avoid dma mr */ /* avoid dma mr */
if (mr->lkey)
rvt_dereg_clean_qps(mr); rvt_dereg_clean_qps(mr);
/* @mr was indexed on rcu protected @lkey_table */
synchronize_rcu();
}
timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ); timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ);
if (!timeout) { if (!timeout) {
rvt_pr_err(rdi, rvt_pr_err(rdi,
......
...@@ -70,7 +70,7 @@ struct aio_ring { ...@@ -70,7 +70,7 @@ struct aio_ring {
struct kioctx_table { struct kioctx_table {
struct rcu_head rcu; struct rcu_head rcu;
unsigned nr; unsigned nr;
struct kioctx *table[]; struct kioctx __rcu *table[];
}; };
struct kioctx_cpu { struct kioctx_cpu {
...@@ -115,7 +115,8 @@ struct kioctx { ...@@ -115,7 +115,8 @@ struct kioctx {
struct page **ring_pages; struct page **ring_pages;
long nr_pages; long nr_pages;
struct work_struct free_work; struct rcu_head free_rcu;
struct work_struct free_work; /* see free_ioctx() */
/* /*
* signals when all in-flight requests are done * signals when all in-flight requests are done
...@@ -329,7 +330,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma) ...@@ -329,7 +330,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
for (i = 0; i < table->nr; i++) { for (i = 0; i < table->nr; i++) {
struct kioctx *ctx; struct kioctx *ctx;
ctx = table->table[i]; ctx = rcu_dereference(table->table[i]);
if (ctx && ctx->aio_ring_file == file) { if (ctx && ctx->aio_ring_file == file) {
if (!atomic_read(&ctx->dead)) { if (!atomic_read(&ctx->dead)) {
ctx->user_id = ctx->mmap_base = vma->vm_start; ctx->user_id = ctx->mmap_base = vma->vm_start;
...@@ -588,6 +589,12 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) ...@@ -588,6 +589,12 @@ static int kiocb_cancel(struct aio_kiocb *kiocb)
return cancel(&kiocb->common); return cancel(&kiocb->common);
} }
/*
* free_ioctx() should be RCU delayed to synchronize against the RCU
* protected lookup_ioctx() and also needs process context to call
* aio_free_ring(), so the double bouncing through kioctx->free_rcu and
* ->free_work.
*/
static void free_ioctx(struct work_struct *work) static void free_ioctx(struct work_struct *work)
{ {
struct kioctx *ctx = container_of(work, struct kioctx, free_work); struct kioctx *ctx = container_of(work, struct kioctx, free_work);
...@@ -601,6 +608,14 @@ static void free_ioctx(struct work_struct *work) ...@@ -601,6 +608,14 @@ static void free_ioctx(struct work_struct *work)
kmem_cache_free(kioctx_cachep, ctx); kmem_cache_free(kioctx_cachep, ctx);
} }
static void free_ioctx_rcufn(struct rcu_head *head)
{
struct kioctx *ctx = container_of(head, struct kioctx, free_rcu);
INIT_WORK(&ctx->free_work, free_ioctx);
schedule_work(&ctx->free_work);
}
static void free_ioctx_reqs(struct percpu_ref *ref) static void free_ioctx_reqs(struct percpu_ref *ref)
{ {
struct kioctx *ctx = container_of(ref, struct kioctx, reqs); struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
...@@ -609,8 +624,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) ...@@ -609,8 +624,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
complete(&ctx->rq_wait->comp); complete(&ctx->rq_wait->comp);
INIT_WORK(&ctx->free_work, free_ioctx); /* Synchronize against RCU protected table->table[] dereferences */
schedule_work(&ctx->free_work); call_rcu(&ctx->free_rcu, free_ioctx_rcufn);
} }
/* /*
...@@ -651,9 +666,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) ...@@ -651,9 +666,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
while (1) { while (1) {
if (table) if (table)
for (i = 0; i < table->nr; i++) for (i = 0; i < table->nr; i++)
if (!table->table[i]) { if (!rcu_access_pointer(table->table[i])) {
ctx->id = i; ctx->id = i;
table->table[i] = ctx; rcu_assign_pointer(table->table[i], ctx);
spin_unlock(&mm->ioctx_lock); spin_unlock(&mm->ioctx_lock);
/* While kioctx setup is in progress, /* While kioctx setup is in progress,
...@@ -834,11 +849,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, ...@@ -834,11 +849,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
} }
table = rcu_dereference_raw(mm->ioctx_table); table = rcu_dereference_raw(mm->ioctx_table);
WARN_ON(ctx != table->table[ctx->id]); WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
table->table[ctx->id] = NULL; RCU_INIT_POINTER(table->table[ctx->id], NULL);
spin_unlock(&mm->ioctx_lock); spin_unlock(&mm->ioctx_lock);
/* percpu_ref_kill() will do the necessary call_rcu() */ /* free_ioctx_reqs() will do the necessary RCU synchronization */
wake_up_all(&ctx->wait); wake_up_all(&ctx->wait);
/* /*
...@@ -880,7 +895,8 @@ void exit_aio(struct mm_struct *mm) ...@@ -880,7 +895,8 @@ void exit_aio(struct mm_struct *mm)
skipped = 0; skipped = 0;
for (i = 0; i < table->nr; ++i) { for (i = 0; i < table->nr; ++i) {
struct kioctx *ctx = table->table[i]; struct kioctx *ctx =
rcu_dereference_protected(table->table[i], true);
if (!ctx) { if (!ctx) {
skipped++; skipped++;
...@@ -1069,7 +1085,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) ...@@ -1069,7 +1085,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
if (!table || id >= table->nr) if (!table || id >= table->nr)
goto out; goto out;
ctx = table->table[id]; ctx = rcu_dereference(table->table[id]);
if (ctx && ctx->user_id == ctx_id) { if (ctx && ctx->user_id == ctx_id) {
percpu_ref_get(&ctx->users); percpu_ref_get(&ctx->users);
ret = ctx; ret = ctx;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment