Commit fa5fca78 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.10-2020-11-20' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
 "Mostly regression or stable fodder:

   - Disallow async path resolution of /proc/self

   - Tighten constraints for segmented async buffered reads

   - Fix double completion for a retry error case

   - Fix for fixed file life times (Pavel)"

* tag 'io_uring-5.10-2020-11-20' of git://git.kernel.dk/linux-block:
  io_uring: order refnode recycling
  io_uring: get an active ref_node from files_data
  io_uring: don't double complete failed reissue request
  mm: never attempt async page lock if we've transferred data already
  io_uring: handle -EOPNOTSUPP on path resolution
  proc: don't allow async path resolution of /proc/self components
parents 4ccf7a01 e297822b
...@@ -205,6 +205,7 @@ struct fixed_file_ref_node { ...@@ -205,6 +205,7 @@ struct fixed_file_ref_node {
struct list_head file_list; struct list_head file_list;
struct fixed_file_data *file_data; struct fixed_file_data *file_data;
struct llist_node llist; struct llist_node llist;
bool done;
}; };
struct fixed_file_data { struct fixed_file_data {
...@@ -478,6 +479,7 @@ struct io_sr_msg { ...@@ -478,6 +479,7 @@ struct io_sr_msg {
struct io_open { struct io_open {
struct file *file; struct file *file;
int dfd; int dfd;
bool ignore_nonblock;
struct filename *filename; struct filename *filename;
struct open_how how; struct open_how how;
unsigned long nofile; unsigned long nofile;
...@@ -2577,7 +2579,6 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error) ...@@ -2577,7 +2579,6 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
} }
end_req: end_req:
req_set_fail_links(req); req_set_fail_links(req);
io_req_complete(req, ret);
return false; return false;
} }
#endif #endif
...@@ -3795,6 +3796,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe ...@@ -3795,6 +3796,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
return ret; return ret;
} }
req->open.nofile = rlimit(RLIMIT_NOFILE); req->open.nofile = rlimit(RLIMIT_NOFILE);
req->open.ignore_nonblock = false;
req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_NEED_CLEANUP;
return 0; return 0;
} }
...@@ -3838,7 +3840,7 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock) ...@@ -3838,7 +3840,7 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock)
struct file *file; struct file *file;
int ret; int ret;
if (force_nonblock) if (force_nonblock && !req->open.ignore_nonblock)
return -EAGAIN; return -EAGAIN;
ret = build_open_flags(&req->open.how, &op); ret = build_open_flags(&req->open.how, &op);
...@@ -3853,6 +3855,21 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock) ...@@ -3853,6 +3855,21 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock)
if (IS_ERR(file)) { if (IS_ERR(file)) {
put_unused_fd(ret); put_unused_fd(ret);
ret = PTR_ERR(file); ret = PTR_ERR(file);
/*
* A work-around to ensure that /proc/self works that way
* that it should - if we get -EOPNOTSUPP back, then assume
* that proc_self_get_link() failed us because we're in async
* context. We should be safe to retry this from the task
* itself with force_nonblock == false set, as it should not
* block on lookup. Would be nice to know this upfront and
* avoid the async dance, but doesn't seem feasible.
*/
if (ret == -EOPNOTSUPP && io_wq_current_is_worker()) {
req->open.ignore_nonblock = true;
refcount_inc(&req->refs);
io_req_task_queue(req);
return 0;
}
} else { } else {
fsnotify_open(file); fsnotify_open(file);
fd_install(ret, file); fd_install(ret, file);
...@@ -6957,9 +6974,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) ...@@ -6957,9 +6974,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
return -ENXIO; return -ENXIO;
spin_lock(&data->lock); spin_lock(&data->lock);
if (!list_empty(&data->ref_list)) ref_node = data->node;
ref_node = list_first_entry(&data->ref_list,
struct fixed_file_ref_node, node);
spin_unlock(&data->lock); spin_unlock(&data->lock);
if (ref_node) if (ref_node)
percpu_ref_kill(&ref_node->refs); percpu_ref_kill(&ref_node->refs);
...@@ -7308,10 +7323,6 @@ static void __io_file_put_work(struct fixed_file_ref_node *ref_node) ...@@ -7308,10 +7323,6 @@ static void __io_file_put_work(struct fixed_file_ref_node *ref_node)
kfree(pfile); kfree(pfile);
} }
spin_lock(&file_data->lock);
list_del(&ref_node->node);
spin_unlock(&file_data->lock);
percpu_ref_exit(&ref_node->refs); percpu_ref_exit(&ref_node->refs);
kfree(ref_node); kfree(ref_node);
percpu_ref_put(&file_data->refs); percpu_ref_put(&file_data->refs);
...@@ -7338,17 +7349,32 @@ static void io_file_put_work(struct work_struct *work) ...@@ -7338,17 +7349,32 @@ static void io_file_put_work(struct work_struct *work)
static void io_file_data_ref_zero(struct percpu_ref *ref) static void io_file_data_ref_zero(struct percpu_ref *ref)
{ {
struct fixed_file_ref_node *ref_node; struct fixed_file_ref_node *ref_node;
struct fixed_file_data *data;
struct io_ring_ctx *ctx; struct io_ring_ctx *ctx;
bool first_add; bool first_add = false;
int delay = HZ; int delay = HZ;
ref_node = container_of(ref, struct fixed_file_ref_node, refs); ref_node = container_of(ref, struct fixed_file_ref_node, refs);
ctx = ref_node->file_data->ctx; data = ref_node->file_data;
ctx = data->ctx;
spin_lock(&data->lock);
ref_node->done = true;
while (!list_empty(&data->ref_list)) {
ref_node = list_first_entry(&data->ref_list,
struct fixed_file_ref_node, node);
/* recycle ref nodes in order */
if (!ref_node->done)
break;
list_del(&ref_node->node);
first_add |= llist_add(&ref_node->llist, &ctx->file_put_llist);
}
spin_unlock(&data->lock);
if (percpu_ref_is_dying(&ctx->file_data->refs)) if (percpu_ref_is_dying(&data->refs))
delay = 0; delay = 0;
first_add = llist_add(&ref_node->llist, &ctx->file_put_llist);
if (!delay) if (!delay)
mod_delayed_work(system_wq, &ctx->file_put_work, 0); mod_delayed_work(system_wq, &ctx->file_put_work, 0);
else if (first_add) else if (first_add)
...@@ -7372,6 +7398,7 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node( ...@@ -7372,6 +7398,7 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
INIT_LIST_HEAD(&ref_node->node); INIT_LIST_HEAD(&ref_node->node);
INIT_LIST_HEAD(&ref_node->file_list); INIT_LIST_HEAD(&ref_node->file_list);
ref_node->file_data = ctx->file_data; ref_node->file_data = ctx->file_data;
ref_node->done = false;
return ref_node; return ref_node;
} }
...@@ -7467,7 +7494,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, ...@@ -7467,7 +7494,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
file_data->node = ref_node; file_data->node = ref_node;
spin_lock(&file_data->lock); spin_lock(&file_data->lock);
list_add(&ref_node->node, &file_data->ref_list); list_add_tail(&ref_node->node, &file_data->ref_list);
spin_unlock(&file_data->lock); spin_unlock(&file_data->lock);
percpu_ref_get(&file_data->refs); percpu_ref_get(&file_data->refs);
return ret; return ret;
...@@ -7626,7 +7653,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, ...@@ -7626,7 +7653,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (needs_switch) { if (needs_switch) {
percpu_ref_kill(&data->node->refs); percpu_ref_kill(&data->node->refs);
spin_lock(&data->lock); spin_lock(&data->lock);
list_add(&ref_node->node, &data->ref_list); list_add_tail(&ref_node->node, &data->ref_list);
data->node = ref_node; data->node = ref_node;
spin_unlock(&data->lock); spin_unlock(&data->lock);
percpu_ref_get(&ctx->file_data->refs); percpu_ref_get(&ctx->file_data->refs);
......
...@@ -16,6 +16,13 @@ static const char *proc_self_get_link(struct dentry *dentry, ...@@ -16,6 +16,13 @@ static const char *proc_self_get_link(struct dentry *dentry,
pid_t tgid = task_tgid_nr_ns(current, ns); pid_t tgid = task_tgid_nr_ns(current, ns);
char *name; char *name;
/*
* Not currently supported. Once we can inherit all of struct pid,
* we can allow this.
*/
if (current->flags & PF_KTHREAD)
return ERR_PTR(-EOPNOTSUPP);
if (!tgid) if (!tgid)
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
/* max length of unsigned int in decimal + NULL term */ /* max length of unsigned int in decimal + NULL term */
......
...@@ -2347,10 +2347,15 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, ...@@ -2347,10 +2347,15 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
page_not_up_to_date: page_not_up_to_date:
/* Get exclusive access to the page ... */ /* Get exclusive access to the page ... */
if (iocb->ki_flags & IOCB_WAITQ) if (iocb->ki_flags & IOCB_WAITQ) {
if (written) {
put_page(page);
goto out;
}
error = lock_page_async(page, iocb->ki_waitq); error = lock_page_async(page, iocb->ki_waitq);
else } else {
error = lock_page_killable(page); error = lock_page_killable(page);
}
if (unlikely(error)) if (unlikely(error))
goto readpage_error; goto readpage_error;
...@@ -2393,10 +2398,15 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, ...@@ -2393,10 +2398,15 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
} }
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
if (iocb->ki_flags & IOCB_WAITQ) if (iocb->ki_flags & IOCB_WAITQ) {
if (written) {
put_page(page);
goto out;
}
error = lock_page_async(page, iocb->ki_waitq); error = lock_page_async(page, iocb->ki_waitq);
else } else {
error = lock_page_killable(page); error = lock_page_killable(page);
}
if (unlikely(error)) if (unlikely(error))
goto readpage_error; goto readpage_error;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment