Commit 01d7a356 authored by Jens Axboe's avatar Jens Axboe

aio: prevent potential eventfd recursion on poll

If we have nested or circular eventfd wakeups, then we can deadlock if
we run them inline from our poll waitqueue wakeup handler. It's also
possible to have very long chains of notifications, to the extent where
we could risk blowing the stack.

Check the eventfd recursion count before calling eventfd_signal(). If
it's non-zero, then punt the signaling to async context. This is always
safe, as it takes us out-of-line in terms of stack and locking context.

Cc: stable@vger.kernel.org # 4.19+
Reviewed-by: default avatarJeff Moyer <jmoyer@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 3e577dcd
...@@ -1610,6 +1610,14 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, ...@@ -1610,6 +1610,14 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
return 0; return 0;
} }
static void aio_poll_put_work(struct work_struct *work)
{
struct poll_iocb *req = container_of(work, struct poll_iocb, work);
struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
iocb_put(iocb);
}
static void aio_poll_complete_work(struct work_struct *work) static void aio_poll_complete_work(struct work_struct *work)
{ {
struct poll_iocb *req = container_of(work, struct poll_iocb, work); struct poll_iocb *req = container_of(work, struct poll_iocb, work);
...@@ -1674,6 +1682,8 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, ...@@ -1674,6 +1682,8 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
list_del_init(&req->wait.entry); list_del_init(&req->wait.entry);
if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
struct kioctx *ctx = iocb->ki_ctx;
/* /*
* Try to complete the iocb inline if we can. Use * Try to complete the iocb inline if we can. Use
* irqsave/irqrestore because not all filesystems (e.g. fuse) * irqsave/irqrestore because not all filesystems (e.g. fuse)
...@@ -1683,7 +1693,13 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, ...@@ -1683,7 +1693,13 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
list_del(&iocb->ki_list); list_del(&iocb->ki_list);
iocb->ki_res.res = mangle_poll(mask); iocb->ki_res.res = mangle_poll(mask);
req->done = true; req->done = true;
spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); if (iocb->ki_eventfd && eventfd_signal_count()) {
iocb = NULL;
INIT_WORK(&req->work, aio_poll_put_work);
schedule_work(&req->work);
}
spin_unlock_irqrestore(&ctx->ctx_lock, flags);
if (iocb)
iocb_put(iocb); iocb_put(iocb);
} else { } else {
schedule_work(&req->work); schedule_work(&req->work);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment