Commit 4e6b2b2e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-6.1-2022-11-11' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:
 "Nothing major, just a few minor tweaks:

   - Tweak for the TCP zero-copy io_uring self test (Pavel)

   - Rather than use our internal cached value of number of CQ events
     available, use what the user can see (Dylan)

   - Fix a typo in a comment, added in this release (me)

   - Don't allow wrapping while adding provided buffers (me)

   - Fix a double poll race, and add a lockdep assertion for it too
     (Pavel)"

* tag 'io_uring-6.1-2022-11-11' of git://git.kernel.dk/linux:
  io_uring/poll: lockdep annote io_poll_req_insert_locked
  io_uring/poll: fix double poll req->flags races
  io_uring: check for rollover of buffer ID when providing buffers
  io_uring: calculate CQEs from the user visible value
  io_uring: fix typo in io_uring.h comment
  selftests/net: don't tests batched TCP io_uring zc
parents f5020a08 5576035f
...@@ -222,7 +222,7 @@ enum io_uring_op { ...@@ -222,7 +222,7 @@ enum io_uring_op {
/* /*
* sqe->uring_cmd_flags * sqe->uring_cmd_flags
* IORING_URING_CMD_FIXED use registered buffer; pass thig flag * IORING_URING_CMD_FIXED use registered buffer; pass this flag
* along with setting sqe->buf_index. * along with setting sqe->buf_index.
*/ */
#define IORING_URING_CMD_FIXED (1U << 0) #define IORING_URING_CMD_FIXED (1U << 0)
......
...@@ -176,6 +176,11 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx) ...@@ -176,6 +176,11 @@ static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx)
return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head);
} }
static inline unsigned int __io_cqring_events_user(struct io_ring_ctx *ctx)
{
return READ_ONCE(ctx->rings->cq.tail) - READ_ONCE(ctx->rings->cq.head);
}
static bool io_match_linked(struct io_kiocb *head) static bool io_match_linked(struct io_kiocb *head)
{ {
struct io_kiocb *req; struct io_kiocb *req;
...@@ -2315,7 +2320,7 @@ static inline bool io_has_work(struct io_ring_ctx *ctx) ...@@ -2315,7 +2320,7 @@ static inline bool io_has_work(struct io_ring_ctx *ctx)
static inline bool io_should_wake(struct io_wait_queue *iowq) static inline bool io_should_wake(struct io_wait_queue *iowq)
{ {
struct io_ring_ctx *ctx = iowq->ctx; struct io_ring_ctx *ctx = iowq->ctx;
int dist = ctx->cached_cq_tail - (int) iowq->cq_tail; int dist = READ_ONCE(ctx->rings->cq.tail) - (int) iowq->cq_tail;
/* /*
* Wake up if we have enough events, or if a timeout occurred since we * Wake up if we have enough events, or if a timeout occurred since we
...@@ -2399,7 +2404,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, ...@@ -2399,7 +2404,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return ret; return ret;
io_cqring_overflow_flush(ctx); io_cqring_overflow_flush(ctx);
if (io_cqring_events(ctx) >= min_events) /* if user messes with these they will just get an early return */
if (__io_cqring_events_user(ctx) >= min_events)
return 0; return 0;
} while (ret > 0); } while (ret > 0);
......
...@@ -346,6 +346,8 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe ...@@ -346,6 +346,8 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
tmp = READ_ONCE(sqe->off); tmp = READ_ONCE(sqe->off);
if (tmp > USHRT_MAX) if (tmp > USHRT_MAX)
return -E2BIG; return -E2BIG;
if (tmp + p->nbufs >= USHRT_MAX)
return -EINVAL;
p->bid = tmp; p->bid = tmp;
return 0; return 0;
} }
......
...@@ -116,6 +116,8 @@ static void io_poll_req_insert_locked(struct io_kiocb *req) ...@@ -116,6 +116,8 @@ static void io_poll_req_insert_locked(struct io_kiocb *req)
struct io_hash_table *table = &req->ctx->cancel_table_locked; struct io_hash_table *table = &req->ctx->cancel_table_locked;
u32 index = hash_long(req->cqe.user_data, table->hash_bits); u32 index = hash_long(req->cqe.user_data, table->hash_bits);
lockdep_assert_held(&req->ctx->uring_lock);
hlist_add_head(&req->hash_node, &table->hbs[index].list); hlist_add_head(&req->hash_node, &table->hbs[index].list);
} }
...@@ -394,7 +396,8 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, ...@@ -394,7 +396,8 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
return 1; return 1;
} }
static void io_poll_double_prepare(struct io_kiocb *req) /* fails only when polling is already completing by the first entry */
static bool io_poll_double_prepare(struct io_kiocb *req)
{ {
struct wait_queue_head *head; struct wait_queue_head *head;
struct io_poll *poll = io_poll_get_single(req); struct io_poll *poll = io_poll_get_single(req);
...@@ -403,20 +406,20 @@ static void io_poll_double_prepare(struct io_kiocb *req) ...@@ -403,20 +406,20 @@ static void io_poll_double_prepare(struct io_kiocb *req)
rcu_read_lock(); rcu_read_lock();
head = smp_load_acquire(&poll->head); head = smp_load_acquire(&poll->head);
/* /*
* poll arm may not hold ownership and so race with * poll arm might not hold ownership and so race for req->flags with
* io_poll_wake() by modifying req->flags. There is only one * io_poll_wake(). There is only one poll entry queued, serialise with
* poll entry queued, serialise with it by taking its head lock. * it by taking its head lock. As we're still arming the tw hanlder
* is not going to be run, so there are no races with it.
*/ */
if (head) if (head) {
spin_lock_irq(&head->lock); spin_lock_irq(&head->lock);
req->flags |= REQ_F_DOUBLE_POLL; req->flags |= REQ_F_DOUBLE_POLL;
if (req->opcode == IORING_OP_POLL_ADD) if (req->opcode == IORING_OP_POLL_ADD)
req->flags |= REQ_F_ASYNC_DATA; req->flags |= REQ_F_ASYNC_DATA;
if (head)
spin_unlock_irq(&head->lock); spin_unlock_irq(&head->lock);
}
rcu_read_unlock(); rcu_read_unlock();
return !!head;
} }
static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
...@@ -454,7 +457,11 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, ...@@ -454,7 +457,11 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
/* mark as double wq entry */ /* mark as double wq entry */
wqe_private |= IO_WQE_F_DOUBLE; wqe_private |= IO_WQE_F_DOUBLE;
io_init_poll_iocb(poll, first->events, first->wait.func); io_init_poll_iocb(poll, first->events, first->wait.func);
io_poll_double_prepare(req); if (!io_poll_double_prepare(req)) {
/* the request is completing, just back off */
kfree(poll);
return;
}
*poll_ptr = poll; *poll_ptr = poll;
} else { } else {
/* fine to modify, there is no poll queued to race with us */ /* fine to modify, there is no poll queued to race with us */
......
...@@ -29,7 +29,7 @@ if [[ "$#" -eq "0" ]]; then ...@@ -29,7 +29,7 @@ if [[ "$#" -eq "0" ]]; then
for IP in "${IPs[@]}"; do for IP in "${IPs[@]}"; do
for mode in $(seq 1 3); do for mode in $(seq 1 3); do
$0 "$IP" udp -m "$mode" -t 1 -n 32 $0 "$IP" udp -m "$mode" -t 1 -n 32
$0 "$IP" tcp -m "$mode" -t 1 -n 32 $0 "$IP" tcp -m "$mode" -t 1 -n 1
done done
done done
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment