Commit 65a6543d authored by Xiaoguang Wang's avatar Xiaoguang Wang Committed by Jens Axboe

io_uring: fix io_kiocb.flags modification race in IOPOLL mode

While testing io_uring in arm, we found sometimes io_sq_thread() keeps
polling io requests even though there are not inflight io requests in
block layer. After some investigations, found a possible race about
io_kiocb.flags, see below race codes:
  1) in the end of io_write() or io_read()
    req->flags &= ~REQ_F_NEED_CLEANUP;
    kfree(iovec);
    return ret;

  2) in io_complete_rw_iopoll()
    if (res != -EAGAIN)
        req->flags |= REQ_F_IOPOLL_COMPLETED;

In IOPOLL mode, io requests still maybe completed by interrupt, then
above codes are not safe, concurrent modifications to req->flags, which
is not protected by lock or is not atomic modifications. I also had
disassemble io_complete_rw_iopoll() in arm:
   req->flags |= REQ_F_IOPOLL_COMPLETED;
   0xffff000008387b18 <+76>:    ldr     w0, [x19,#104]
   0xffff000008387b1c <+80>:    orr     w0, w0, #0x1000
   0xffff000008387b20 <+84>:    str     w0, [x19,#104]

Seems that the "req->flags |= REQ_F_IOPOLL_COMPLETED;" is  load and
modification, two instructions, which obviously is not atomic.

To fix this issue, add a new iopoll_completed in io_kiocb to indicate
whether io request is completed.
Signed-off-by: default avatarXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent e697deed
...@@ -529,7 +529,6 @@ enum { ...@@ -529,7 +529,6 @@ enum {
REQ_F_INFLIGHT_BIT, REQ_F_INFLIGHT_BIT,
REQ_F_CUR_POS_BIT, REQ_F_CUR_POS_BIT,
REQ_F_NOWAIT_BIT, REQ_F_NOWAIT_BIT,
REQ_F_IOPOLL_COMPLETED_BIT,
REQ_F_LINK_TIMEOUT_BIT, REQ_F_LINK_TIMEOUT_BIT,
REQ_F_TIMEOUT_BIT, REQ_F_TIMEOUT_BIT,
REQ_F_ISREG_BIT, REQ_F_ISREG_BIT,
...@@ -574,8 +573,6 @@ enum { ...@@ -574,8 +573,6 @@ enum {
REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
/* must not punt to workers */ /* must not punt to workers */
REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT),
/* polled IO has completed */
REQ_F_IOPOLL_COMPLETED = BIT(REQ_F_IOPOLL_COMPLETED_BIT),
/* has linked timeout */ /* has linked timeout */
REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
/* timeout request */ /* timeout request */
...@@ -640,6 +637,8 @@ struct io_kiocb { ...@@ -640,6 +637,8 @@ struct io_kiocb {
struct io_async_ctx *io; struct io_async_ctx *io;
int cflags; int cflags;
u8 opcode; u8 opcode;
/* polled IO has completed */
u8 iopoll_completed;
u16 buf_index; u16 buf_index;
...@@ -1798,7 +1797,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, ...@@ -1798,7 +1797,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
* If we find a request that requires polling, break out * If we find a request that requires polling, break out
* and complete those lists first, if we have entries there. * and complete those lists first, if we have entries there.
*/ */
if (req->flags & REQ_F_IOPOLL_COMPLETED) { if (READ_ONCE(req->iopoll_completed)) {
list_move_tail(&req->list, &done); list_move_tail(&req->list, &done);
continue; continue;
} }
...@@ -1979,7 +1978,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) ...@@ -1979,7 +1978,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
req_set_fail_links(req); req_set_fail_links(req);
req->result = res; req->result = res;
if (res != -EAGAIN) if (res != -EAGAIN)
req->flags |= REQ_F_IOPOLL_COMPLETED; WRITE_ONCE(req->iopoll_completed, 1);
} }
/* /*
...@@ -2012,7 +2011,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req) ...@@ -2012,7 +2011,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
* For fast devices, IO may have already completed. If it has, add * For fast devices, IO may have already completed. If it has, add
* it to the front so we find it first. * it to the front so we find it first.
*/ */
if (req->flags & REQ_F_IOPOLL_COMPLETED) if (READ_ONCE(req->iopoll_completed))
list_add(&req->list, &ctx->poll_list); list_add(&req->list, &ctx->poll_list);
else else
list_add_tail(&req->list, &ctx->poll_list); list_add_tail(&req->list, &ctx->poll_list);
...@@ -2140,6 +2139,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, ...@@ -2140,6 +2139,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_flags |= IOCB_HIPRI;
kiocb->ki_complete = io_complete_rw_iopoll; kiocb->ki_complete = io_complete_rw_iopoll;
req->result = 0; req->result = 0;
req->iopoll_completed = 0;
} else { } else {
if (kiocb->ki_flags & IOCB_HIPRI) if (kiocb->ki_flags & IOCB_HIPRI)
return -EINVAL; return -EINVAL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment