Commit 8c930747 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-6.11-20240726' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

 - Fix a syzbot issue for the msg ring cache added in this release. No
   ill effects from this one, but it did make KMSAN unhappy (me)

 - Sanitize the NAPI timeout handling, by unifying the value handling
   into all ktime_t rather than converting back and forth (Pavel)

 - Fail NAPI registration for IOPOLL rings, it's not supported (Pavel)

 - Fix a theoretical issue with ring polling and cancelations (Pavel)

 - Various little cleanups and fixes (Pavel)

* tag 'io_uring-6.11-20240726' of git://git.kernel.dk/linux:
  io_uring/napi: pass ktime to io_napi_adjust_timeout
  io_uring/napi: use ktime in busy polling
  io_uring/msg_ring: fix uninitialized use of target_req->flags
  io_uring: align iowq and task request error handling
  io_uring: kill REQ_F_CANCEL_SEQ
  io_uring: simplify io_uring_cmd return
  io_uring: fix io_match_task must_hold
  io_uring: don't allow netpolling with SETUP_IOPOLL
  io_uring: tighten task exit cancellations
parents bc4eee85 35816961
...@@ -404,7 +404,7 @@ struct io_ring_ctx { ...@@ -404,7 +404,7 @@ struct io_ring_ctx {
spinlock_t napi_lock; /* napi_list lock */ spinlock_t napi_lock; /* napi_list lock */
/* napi busy poll default timeout */ /* napi busy poll default timeout */
unsigned int napi_busy_poll_to; ktime_t napi_busy_poll_dt;
bool napi_prefer_busy_poll; bool napi_prefer_busy_poll;
bool napi_enabled; bool napi_enabled;
...@@ -461,7 +461,6 @@ enum { ...@@ -461,7 +461,6 @@ enum {
REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_ISREG_BIT, REQ_F_ISREG_BIT,
REQ_F_POLL_NO_LAZY_BIT, REQ_F_POLL_NO_LAZY_BIT,
REQ_F_CANCEL_SEQ_BIT,
REQ_F_CAN_POLL_BIT, REQ_F_CAN_POLL_BIT,
REQ_F_BL_EMPTY_BIT, REQ_F_BL_EMPTY_BIT,
REQ_F_BL_NO_RECYCLE_BIT, REQ_F_BL_NO_RECYCLE_BIT,
...@@ -536,8 +535,6 @@ enum { ...@@ -536,8 +535,6 @@ enum {
REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT), REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
/* don't use lazy poll wake for this request */ /* don't use lazy poll wake for this request */
REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT), REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
/* cancel sequence is set and valid */
REQ_F_CANCEL_SEQ = IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT),
/* file is pollable */ /* file is pollable */
REQ_F_CAN_POLL = IO_REQ_FLAG(REQ_F_CAN_POLL_BIT), REQ_F_CAN_POLL = IO_REQ_FLAG(REQ_F_CAN_POLL_BIT),
/* buffer list was empty after selection of buffer */ /* buffer list was empty after selection of buffer */
......
...@@ -1849,7 +1849,7 @@ void io_wq_submit_work(struct io_wq_work *work) ...@@ -1849,7 +1849,7 @@ void io_wq_submit_work(struct io_wq_work *work)
} while (1); } while (1);
/* avoid locking problems by failing it from a clean context */ /* avoid locking problems by failing it from a clean context */
if (ret < 0) if (ret)
io_req_task_queue_fail(req, ret); io_req_task_queue_fail(req, ret);
} }
...@@ -2416,12 +2416,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, ...@@ -2416,12 +2416,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
if (uts) { if (uts) {
struct timespec64 ts; struct timespec64 ts;
ktime_t dt;
if (get_timespec64(&ts, uts)) if (get_timespec64(&ts, uts))
return -EFAULT; return -EFAULT;
iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); dt = timespec64_to_ktime(ts);
io_napi_adjust_timeout(ctx, &iowq, &ts); iowq.timeout = ktime_add(dt, ktime_get());
io_napi_adjust_timeout(ctx, &iowq, dt);
} }
if (sig) { if (sig) {
...@@ -3031,8 +3033,11 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) ...@@ -3031,8 +3033,11 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
bool loop = false; bool loop = false;
io_uring_drop_tctx_refs(current); io_uring_drop_tctx_refs(current);
if (!tctx_inflight(tctx, !cancel_all))
break;
/* read completions before cancelations */ /* read completions before cancelations */
inflight = tctx_inflight(tctx, !cancel_all); inflight = tctx_inflight(tctx, false);
if (!inflight) if (!inflight)
break; break;
......
...@@ -43,7 +43,7 @@ struct io_wait_queue { ...@@ -43,7 +43,7 @@ struct io_wait_queue {
ktime_t timeout; ktime_t timeout;
#ifdef CONFIG_NET_RX_BUSY_POLL #ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int napi_busy_poll_to; ktime_t napi_busy_poll_dt;
bool napi_prefer_busy_poll; bool napi_prefer_busy_poll;
#endif #endif
}; };
......
...@@ -110,10 +110,10 @@ static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx) ...@@ -110,10 +110,10 @@ static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx)
if (spin_trylock(&ctx->msg_lock)) { if (spin_trylock(&ctx->msg_lock)) {
req = io_alloc_cache_get(&ctx->msg_cache); req = io_alloc_cache_get(&ctx->msg_cache);
spin_unlock(&ctx->msg_lock); spin_unlock(&ctx->msg_lock);
}
if (req) if (req)
return req; return req;
return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN); }
return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
} }
static int io_msg_data_remote(struct io_kiocb *req) static int io_msg_data_remote(struct io_kiocb *req)
......
...@@ -33,6 +33,12 @@ static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, ...@@ -33,6 +33,12 @@ static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
return NULL; return NULL;
} }
static inline ktime_t net_to_ktime(unsigned long t)
{
/* napi approximating usecs, reverse busy_loop_current_time */
return ns_to_ktime(t << 10);
}
void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
{ {
struct hlist_head *hash_list; struct hlist_head *hash_list;
...@@ -102,14 +108,14 @@ static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale) ...@@ -102,14 +108,14 @@ static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
__io_napi_remove_stale(ctx); __io_napi_remove_stale(ctx);
} }
static inline bool io_napi_busy_loop_timeout(unsigned long start_time, static inline bool io_napi_busy_loop_timeout(ktime_t start_time,
unsigned long bp_usec) ktime_t bp)
{ {
if (bp_usec) { if (bp) {
unsigned long end_time = start_time + bp_usec; ktime_t end_time = ktime_add(start_time, bp);
unsigned long now = busy_loop_current_time(); ktime_t now = net_to_ktime(busy_loop_current_time());
return time_after(now, end_time); return ktime_after(now, end_time);
} }
return true; return true;
...@@ -124,7 +130,8 @@ static bool io_napi_busy_loop_should_end(void *data, ...@@ -124,7 +130,8 @@ static bool io_napi_busy_loop_should_end(void *data,
return true; return true;
if (io_should_wake(iowq) || io_has_work(iowq->ctx)) if (io_should_wake(iowq) || io_has_work(iowq->ctx))
return true; return true;
if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to)) if (io_napi_busy_loop_timeout(net_to_ktime(start_time),
iowq->napi_busy_poll_dt))
return true; return true;
return false; return false;
...@@ -181,10 +188,12 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, ...@@ -181,10 +188,12 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
*/ */
void io_napi_init(struct io_ring_ctx *ctx) void io_napi_init(struct io_ring_ctx *ctx)
{ {
u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC;
INIT_LIST_HEAD(&ctx->napi_list); INIT_LIST_HEAD(&ctx->napi_list);
spin_lock_init(&ctx->napi_lock); spin_lock_init(&ctx->napi_lock);
ctx->napi_prefer_busy_poll = false; ctx->napi_prefer_busy_poll = false;
ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll); ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
} }
/* /*
...@@ -217,11 +226,13 @@ void io_napi_free(struct io_ring_ctx *ctx) ...@@ -217,11 +226,13 @@ void io_napi_free(struct io_ring_ctx *ctx)
int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
{ {
const struct io_uring_napi curr = { const struct io_uring_napi curr = {
.busy_poll_to = ctx->napi_busy_poll_to, .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
.prefer_busy_poll = ctx->napi_prefer_busy_poll .prefer_busy_poll = ctx->napi_prefer_busy_poll
}; };
struct io_uring_napi napi; struct io_uring_napi napi;
if (ctx->flags & IORING_SETUP_IOPOLL)
return -EINVAL;
if (copy_from_user(&napi, arg, sizeof(napi))) if (copy_from_user(&napi, arg, sizeof(napi)))
return -EFAULT; return -EFAULT;
if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv)
...@@ -230,7 +241,7 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) ...@@ -230,7 +241,7 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
if (copy_to_user(arg, &curr, sizeof(curr))) if (copy_to_user(arg, &curr, sizeof(curr)))
return -EFAULT; return -EFAULT;
WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to); WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC);
WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
WRITE_ONCE(ctx->napi_enabled, true); WRITE_ONCE(ctx->napi_enabled, true);
return 0; return 0;
...@@ -247,14 +258,14 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) ...@@ -247,14 +258,14 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
{ {
const struct io_uring_napi curr = { const struct io_uring_napi curr = {
.busy_poll_to = ctx->napi_busy_poll_to, .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
.prefer_busy_poll = ctx->napi_prefer_busy_poll .prefer_busy_poll = ctx->napi_prefer_busy_poll
}; };
if (arg && copy_to_user(arg, &curr, sizeof(curr))) if (arg && copy_to_user(arg, &curr, sizeof(curr)))
return -EFAULT; return -EFAULT;
WRITE_ONCE(ctx->napi_busy_poll_to, 0); WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
WRITE_ONCE(ctx->napi_prefer_busy_poll, false); WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
WRITE_ONCE(ctx->napi_enabled, false); WRITE_ONCE(ctx->napi_enabled, false);
return 0; return 0;
...@@ -271,25 +282,14 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) ...@@ -271,25 +282,14 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
* the NAPI timeout accordingly. * the NAPI timeout accordingly.
*/ */
void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
struct timespec64 *ts) ktime_t to_wait)
{ {
unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to); ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
if (ts) { if (to_wait)
struct timespec64 poll_to_ts; poll_dt = min(poll_dt, to_wait);
poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
if (timespec64_compare(ts, &poll_to_ts) < 0) {
s64 poll_to_ns = timespec64_to_ns(ts);
if (poll_to_ns > 0) {
u64 val = poll_to_ns + 999;
do_div(val, 1000);
poll_to = val;
}
}
}
iowq->napi_busy_poll_to = poll_to; iowq->napi_busy_poll_dt = poll_dt;
} }
/* /*
...@@ -318,7 +318,7 @@ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) ...@@ -318,7 +318,7 @@ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
LIST_HEAD(napi_list); LIST_HEAD(napi_list);
bool is_stale = false; bool is_stale = false;
if (!READ_ONCE(ctx->napi_busy_poll_to)) if (!READ_ONCE(ctx->napi_busy_poll_dt))
return 0; return 0;
if (list_empty_careful(&ctx->napi_list)) if (list_empty_careful(&ctx->napi_list))
return 0; return 0;
......
...@@ -18,7 +18,7 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg); ...@@ -18,7 +18,7 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg);
void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock); void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock);
void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, void __io_napi_adjust_timeout(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq, struct timespec64 *ts); struct io_wait_queue *iowq, ktime_t to_wait);
void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq); void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq);
int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx); int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx);
...@@ -29,11 +29,11 @@ static inline bool io_napi(struct io_ring_ctx *ctx) ...@@ -29,11 +29,11 @@ static inline bool io_napi(struct io_ring_ctx *ctx)
static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx, static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq, struct io_wait_queue *iowq,
struct timespec64 *ts) ktime_t to_wait)
{ {
if (!io_napi(ctx)) if (!io_napi(ctx))
return; return;
__io_napi_adjust_timeout(ctx, iowq, ts); __io_napi_adjust_timeout(ctx, iowq, to_wait);
} }
static inline void io_napi_busy_loop(struct io_ring_ctx *ctx, static inline void io_napi_busy_loop(struct io_ring_ctx *ctx,
...@@ -55,7 +55,7 @@ static inline void io_napi_add(struct io_kiocb *req) ...@@ -55,7 +55,7 @@ static inline void io_napi_add(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
struct socket *sock; struct socket *sock;
if (!READ_ONCE(ctx->napi_busy_poll_to)) if (!READ_ONCE(ctx->napi_busy_poll_dt))
return; return;
sock = sock_from_file(req->file); sock = sock_from_file(req->file);
...@@ -88,7 +88,7 @@ static inline void io_napi_add(struct io_kiocb *req) ...@@ -88,7 +88,7 @@ static inline void io_napi_add(struct io_kiocb *req)
} }
static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx, static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq, struct io_wait_queue *iowq,
struct timespec64 *ts) ktime_t to_wait)
{ {
} }
static inline void io_napi_busy_loop(struct io_ring_ctx *ctx, static inline void io_napi_busy_loop(struct io_ring_ctx *ctx,
......
...@@ -639,7 +639,7 @@ void io_queue_linked_timeout(struct io_kiocb *req) ...@@ -639,7 +639,7 @@ void io_queue_linked_timeout(struct io_kiocb *req)
static bool io_match_task(struct io_kiocb *head, struct task_struct *task, static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
bool cancel_all) bool cancel_all)
__must_hold(&req->ctx->timeout_lock) __must_hold(&head->ctx->timeout_lock)
{ {
struct io_kiocb *req; struct io_kiocb *req;
......
...@@ -265,7 +265,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) ...@@ -265,7 +265,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
req_set_fail(req); req_set_fail(req);
io_req_uring_cleanup(req, issue_flags); io_req_uring_cleanup(req, issue_flags);
io_req_set_res(req, ret, 0); io_req_set_res(req, ret, 0);
return ret < 0 ? ret : IOU_OK; return IOU_OK;
} }
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment