Commit 598f2404 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.19-2022-06-24' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
 "A few fixes that should go into the 5.19 release. All are fixing
  issues that either happened in this release, or going to stable.

  In detail:

   - A small series of fixlets for the poll handling, all destined for
     stable (Pavel)

   - Fix a merge error from myself that caused a potential -EINVAL for
     the recv/recvmsg flag setting (me)

   - Fix a kbuf recycling issue for partial IO (me)

   - Use the original request for the inflight tracking (me)

   - Fix an issue introduced this merge window with trace points using a
     custom decoder function, which won't work for perf (Dylan)"

* tag 'io_uring-5.19-2022-06-24' of git://git.kernel.dk/linux-block:
  io_uring: use original request task for inflight tracking
  io_uring: move io_uring_get_opcode out of TP_printk
  io_uring: fix double poll leak on repolling
  io_uring: fix wrong arm_poll error handling
  io_uring: fail links when poll fails
  io_uring: fix req->apoll_events
  io_uring: fix merge error in checking send/recv addr2 flags
  io_uring: mark reissue requests with REQ_F_PARTIAL_IO
parents 9d882352 386e4fb6
...@@ -1975,7 +1975,7 @@ static inline void io_req_track_inflight(struct io_kiocb *req) ...@@ -1975,7 +1975,7 @@ static inline void io_req_track_inflight(struct io_kiocb *req)
{ {
if (!(req->flags & REQ_F_INFLIGHT)) { if (!(req->flags & REQ_F_INFLIGHT)) {
req->flags |= REQ_F_INFLIGHT; req->flags |= REQ_F_INFLIGHT;
atomic_inc(&current->io_uring->inflight_tracked); atomic_inc(&req->task->io_uring->inflight_tracked);
} }
} }
...@@ -3437,7 +3437,7 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res) ...@@ -3437,7 +3437,7 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
if (unlikely(res != req->cqe.res)) { if (unlikely(res != req->cqe.res)) {
if ((res == -EAGAIN || res == -EOPNOTSUPP) && if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
io_rw_should_reissue(req)) { io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE; req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
return true; return true;
} }
req_set_fail(req); req_set_fail(req);
...@@ -3487,7 +3487,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) ...@@ -3487,7 +3487,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
kiocb_end_write(req); kiocb_end_write(req);
if (unlikely(res != req->cqe.res)) { if (unlikely(res != req->cqe.res)) {
if (res == -EAGAIN && io_rw_should_reissue(req)) { if (res == -EAGAIN && io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE; req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
return; return;
} }
req->cqe.res = res; req->cqe.res = res;
...@@ -6077,8 +6077,6 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -6077,8 +6077,6 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(sqe->file_index)) if (unlikely(sqe->file_index))
return -EINVAL; return -EINVAL;
if (unlikely(sqe->addr2 || sqe->file_index))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len); sr->len = READ_ONCE(sqe->len);
...@@ -6315,8 +6313,6 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -6315,8 +6313,6 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(sqe->file_index)) if (unlikely(sqe->file_index))
return -EINVAL; return -EINVAL;
if (unlikely(sqe->addr2 || sqe->file_index))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len); sr->len = READ_ONCE(sqe->len);
...@@ -6954,7 +6950,8 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked) ...@@ -6954,7 +6950,8 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
io_req_complete_failed(req, ret); io_req_complete_failed(req, ret);
} }
static void __io_poll_execute(struct io_kiocb *req, int mask, __poll_t events) static void __io_poll_execute(struct io_kiocb *req, int mask,
__poll_t __maybe_unused events)
{ {
req->cqe.res = mask; req->cqe.res = mask;
/* /*
...@@ -6963,7 +6960,6 @@ static void __io_poll_execute(struct io_kiocb *req, int mask, __poll_t events) ...@@ -6963,7 +6960,6 @@ static void __io_poll_execute(struct io_kiocb *req, int mask, __poll_t events)
* CPU. We want to avoid pulling in req->apoll->events for that * CPU. We want to avoid pulling in req->apoll->events for that
* case. * case.
*/ */
req->apoll_events = events;
if (req->opcode == IORING_OP_POLL_ADD) if (req->opcode == IORING_OP_POLL_ADD)
req->io_task_work.func = io_poll_task_func; req->io_task_work.func = io_poll_task_func;
else else
...@@ -7114,6 +7110,8 @@ static int __io_arm_poll_handler(struct io_kiocb *req, ...@@ -7114,6 +7110,8 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
io_init_poll_iocb(poll, mask, io_poll_wake); io_init_poll_iocb(poll, mask, io_poll_wake);
poll->file = req->file; poll->file = req->file;
req->apoll_events = poll->events;
ipt->pt._key = mask; ipt->pt._key = mask;
ipt->req = req; ipt->req = req;
ipt->error = 0; ipt->error = 0;
...@@ -7144,8 +7142,11 @@ static int __io_arm_poll_handler(struct io_kiocb *req, ...@@ -7144,8 +7142,11 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
if (mask) { if (mask) {
/* can't multishot if failed, just queue the event we've got */ /* can't multishot if failed, just queue the event we've got */
if (unlikely(ipt->error || !ipt->nr_entries)) if (unlikely(ipt->error || !ipt->nr_entries)) {
poll->events |= EPOLLONESHOT; poll->events |= EPOLLONESHOT;
req->apoll_events |= EPOLLONESHOT;
ipt->error = 0;
}
__io_poll_execute(req, mask, poll->events); __io_poll_execute(req, mask, poll->events);
return 0; return 0;
} }
...@@ -7207,6 +7208,7 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) ...@@ -7207,6 +7208,7 @@ static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
mask |= EPOLLEXCLUSIVE; mask |= EPOLLEXCLUSIVE;
if (req->flags & REQ_F_POLLED) { if (req->flags & REQ_F_POLLED) {
apoll = req->apoll; apoll = req->apoll;
kfree(apoll->double_poll);
} else if (!(issue_flags & IO_URING_F_UNLOCKED) && } else if (!(issue_flags & IO_URING_F_UNLOCKED) &&
!list_empty(&ctx->apoll_cache)) { !list_empty(&ctx->apoll_cache)) {
apoll = list_first_entry(&ctx->apoll_cache, struct async_poll, apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
...@@ -7392,7 +7394,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe ...@@ -7392,7 +7394,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
return -EINVAL; return -EINVAL;
io_req_set_refcount(req); io_req_set_refcount(req);
req->apoll_events = poll->events = io_poll_parse_events(sqe, flags); poll->events = io_poll_parse_events(sqe, flags);
return 0; return 0;
} }
...@@ -7405,6 +7407,8 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) ...@@ -7405,6 +7407,8 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
ipt.pt._qproc = io_poll_queue_proc; ipt.pt._qproc = io_poll_queue_proc;
ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events); ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events);
if (!ret && ipt.error)
req_set_fail(req);
ret = ret ?: ipt.error; ret = ret ?: ipt.error;
if (ret) if (ret)
__io_req_complete(req, issue_flags, ret, 0); __io_req_complete(req, issue_flags, ret, 0);
......
...@@ -158,6 +158,8 @@ TRACE_EVENT(io_uring_queue_async_work, ...@@ -158,6 +158,8 @@ TRACE_EVENT(io_uring_queue_async_work,
__field( unsigned int, flags ) __field( unsigned int, flags )
__field( struct io_wq_work *, work ) __field( struct io_wq_work *, work )
__field( int, rw ) __field( int, rw )
__string( op_str, io_uring_get_opcode(opcode) )
), ),
TP_fast_assign( TP_fast_assign(
...@@ -168,11 +170,13 @@ TRACE_EVENT(io_uring_queue_async_work, ...@@ -168,11 +170,13 @@ TRACE_EVENT(io_uring_queue_async_work,
__entry->opcode = opcode; __entry->opcode = opcode;
__entry->work = work; __entry->work = work;
__entry->rw = rw; __entry->rw = rw;
__assign_str(op_str, io_uring_get_opcode(opcode));
), ),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p", TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p",
__entry->ctx, __entry->req, __entry->user_data, __entry->ctx, __entry->req, __entry->user_data,
io_uring_get_opcode(__entry->opcode), __get_str(op_str),
__entry->flags, __entry->rw ? "hashed" : "normal", __entry->work) __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
); );
...@@ -198,6 +202,8 @@ TRACE_EVENT(io_uring_defer, ...@@ -198,6 +202,8 @@ TRACE_EVENT(io_uring_defer,
__field( void *, req ) __field( void *, req )
__field( unsigned long long, data ) __field( unsigned long long, data )
__field( u8, opcode ) __field( u8, opcode )
__string( op_str, io_uring_get_opcode(opcode) )
), ),
TP_fast_assign( TP_fast_assign(
...@@ -205,11 +211,13 @@ TRACE_EVENT(io_uring_defer, ...@@ -205,11 +211,13 @@ TRACE_EVENT(io_uring_defer,
__entry->req = req; __entry->req = req;
__entry->data = user_data; __entry->data = user_data;
__entry->opcode = opcode; __entry->opcode = opcode;
__assign_str(op_str, io_uring_get_opcode(opcode));
), ),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s", TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s",
__entry->ctx, __entry->req, __entry->data, __entry->ctx, __entry->req, __entry->data,
io_uring_get_opcode(__entry->opcode)) __get_str(op_str))
); );
/** /**
...@@ -298,6 +306,8 @@ TRACE_EVENT(io_uring_fail_link, ...@@ -298,6 +306,8 @@ TRACE_EVENT(io_uring_fail_link,
__field( unsigned long long, user_data ) __field( unsigned long long, user_data )
__field( u8, opcode ) __field( u8, opcode )
__field( void *, link ) __field( void *, link )
__string( op_str, io_uring_get_opcode(opcode) )
), ),
TP_fast_assign( TP_fast_assign(
...@@ -306,11 +316,13 @@ TRACE_EVENT(io_uring_fail_link, ...@@ -306,11 +316,13 @@ TRACE_EVENT(io_uring_fail_link,
__entry->user_data = user_data; __entry->user_data = user_data;
__entry->opcode = opcode; __entry->opcode = opcode;
__entry->link = link; __entry->link = link;
__assign_str(op_str, io_uring_get_opcode(opcode));
), ),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p", TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p",
__entry->ctx, __entry->req, __entry->user_data, __entry->ctx, __entry->req, __entry->user_data,
io_uring_get_opcode(__entry->opcode), __entry->link) __get_str(op_str), __entry->link)
); );
/** /**
...@@ -390,6 +402,8 @@ TRACE_EVENT(io_uring_submit_sqe, ...@@ -390,6 +402,8 @@ TRACE_EVENT(io_uring_submit_sqe,
__field( u32, flags ) __field( u32, flags )
__field( bool, force_nonblock ) __field( bool, force_nonblock )
__field( bool, sq_thread ) __field( bool, sq_thread )
__string( op_str, io_uring_get_opcode(opcode) )
), ),
TP_fast_assign( TP_fast_assign(
...@@ -400,11 +414,13 @@ TRACE_EVENT(io_uring_submit_sqe, ...@@ -400,11 +414,13 @@ TRACE_EVENT(io_uring_submit_sqe,
__entry->flags = flags; __entry->flags = flags;
__entry->force_nonblock = force_nonblock; __entry->force_nonblock = force_nonblock;
__entry->sq_thread = sq_thread; __entry->sq_thread = sq_thread;
__assign_str(op_str, io_uring_get_opcode(opcode));
), ),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, " TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
"non block %d, sq_thread %d", __entry->ctx, __entry->req, "non block %d, sq_thread %d", __entry->ctx, __entry->req,
__entry->user_data, io_uring_get_opcode(__entry->opcode), __entry->user_data, __get_str(op_str),
__entry->flags, __entry->force_nonblock, __entry->sq_thread) __entry->flags, __entry->force_nonblock, __entry->sq_thread)
); );
...@@ -435,6 +451,8 @@ TRACE_EVENT(io_uring_poll_arm, ...@@ -435,6 +451,8 @@ TRACE_EVENT(io_uring_poll_arm,
__field( u8, opcode ) __field( u8, opcode )
__field( int, mask ) __field( int, mask )
__field( int, events ) __field( int, events )
__string( op_str, io_uring_get_opcode(opcode) )
), ),
TP_fast_assign( TP_fast_assign(
...@@ -444,11 +462,13 @@ TRACE_EVENT(io_uring_poll_arm, ...@@ -444,11 +462,13 @@ TRACE_EVENT(io_uring_poll_arm,
__entry->opcode = opcode; __entry->opcode = opcode;
__entry->mask = mask; __entry->mask = mask;
__entry->events = events; __entry->events = events;
__assign_str(op_str, io_uring_get_opcode(opcode));
), ),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x", TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x",
__entry->ctx, __entry->req, __entry->user_data, __entry->ctx, __entry->req, __entry->user_data,
io_uring_get_opcode(__entry->opcode), __get_str(op_str),
__entry->mask, __entry->events) __entry->mask, __entry->events)
); );
...@@ -474,6 +494,8 @@ TRACE_EVENT(io_uring_task_add, ...@@ -474,6 +494,8 @@ TRACE_EVENT(io_uring_task_add,
__field( unsigned long long, user_data ) __field( unsigned long long, user_data )
__field( u8, opcode ) __field( u8, opcode )
__field( int, mask ) __field( int, mask )
__string( op_str, io_uring_get_opcode(opcode) )
), ),
TP_fast_assign( TP_fast_assign(
...@@ -482,11 +504,13 @@ TRACE_EVENT(io_uring_task_add, ...@@ -482,11 +504,13 @@ TRACE_EVENT(io_uring_task_add,
__entry->user_data = user_data; __entry->user_data = user_data;
__entry->opcode = opcode; __entry->opcode = opcode;
__entry->mask = mask; __entry->mask = mask;
__assign_str(op_str, io_uring_get_opcode(opcode));
), ),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x", TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x",
__entry->ctx, __entry->req, __entry->user_data, __entry->ctx, __entry->req, __entry->user_data,
io_uring_get_opcode(__entry->opcode), __get_str(op_str),
__entry->mask) __entry->mask)
); );
...@@ -523,6 +547,8 @@ TRACE_EVENT(io_uring_req_failed, ...@@ -523,6 +547,8 @@ TRACE_EVENT(io_uring_req_failed,
__field( u64, pad1 ) __field( u64, pad1 )
__field( u64, addr3 ) __field( u64, addr3 )
__field( int, error ) __field( int, error )
__string( op_str, io_uring_get_opcode(sqe->opcode) )
), ),
TP_fast_assign( TP_fast_assign(
...@@ -542,6 +568,8 @@ TRACE_EVENT(io_uring_req_failed, ...@@ -542,6 +568,8 @@ TRACE_EVENT(io_uring_req_failed,
__entry->pad1 = sqe->__pad2[0]; __entry->pad1 = sqe->__pad2[0];
__entry->addr3 = sqe->addr3; __entry->addr3 = sqe->addr3;
__entry->error = error; __entry->error = error;
__assign_str(op_str, io_uring_get_opcode(sqe->opcode));
), ),
TP_printk("ring %p, req %p, user_data 0x%llx, " TP_printk("ring %p, req %p, user_data 0x%llx, "
...@@ -550,7 +578,7 @@ TRACE_EVENT(io_uring_req_failed, ...@@ -550,7 +578,7 @@ TRACE_EVENT(io_uring_req_failed,
"personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, " "personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, "
"error=%d", "error=%d",
__entry->ctx, __entry->req, __entry->user_data, __entry->ctx, __entry->req, __entry->user_data,
io_uring_get_opcode(__entry->opcode), __get_str(op_str),
__entry->flags, __entry->ioprio, __entry->flags, __entry->ioprio,
(unsigned long long)__entry->off, (unsigned long long)__entry->off,
(unsigned long long) __entry->addr, __entry->len, (unsigned long long) __entry->addr, __entry->len,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment