Commit 895aa7b1 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'async-buffered.8' into for-5.9/io_uring

Pull in async buffered reads branch.

* async-buffered.8:
  io_uring: support true async buffered reads, if file provides it
  mm: add kiocb_wait_page_queue_init() helper
  btrfs: flag files as supporting buffered async reads
  xfs: flag files as supporting buffered async reads
  block: flag block devices as supporting IOCB_WAITQ
  fs: add FMODE_BUF_RASYNC
  mm: support async buffered reads in generic_file_buffered_read()
  mm: add support for async page locking
  mm: abstract out wake_page_match() from wake_page_function()
  mm: allow read-ahead with IOCB_NOWAIT set
  io_uring: re-issue block requests that failed because of resources
  io_uring: catch -EIO from buffered issue request failure
  io_uring: always plug for any number of IOs
  block: provide plug based way of signaling forced no-wait semantics
parents 2e0464d4 bcf5a063
......@@ -958,6 +958,7 @@ generic_make_request_checks(struct bio *bio)
struct request_queue *q;
int nr_sectors = bio_sectors(bio);
blk_status_t status = BLK_STS_IOERR;
struct blk_plug *plug;
char b[BDEVNAME_SIZE];
might_sleep();
......@@ -971,6 +972,10 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}
plug = blk_mq_plug(q, bio);
if (plug && plug->nowait)
bio->bi_opf |= REQ_NOWAIT;
/*
* For a REQ_NOWAIT based request, return -EOPNOTSUPP
* if queue is not a request based queue.
......@@ -1800,6 +1805,7 @@ void blk_start_plug(struct blk_plug *plug)
INIT_LIST_HEAD(&plug->cb_list);
plug->rq_count = 0;
plug->multiple_queues = false;
plug->nowait = false;
/*
* Store ordering should not be needed here, since a potential
......
......@@ -1851,7 +1851,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
*/
filp->f_flags |= O_LARGEFILE;
filp->f_mode |= FMODE_NOWAIT;
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
if (filp->f_flags & O_NDELAY)
filp->f_mode |= FMODE_NDELAY;
......
......@@ -3472,7 +3472,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
static int btrfs_file_open(struct inode *inode, struct file *filp)
{
filp->f_mode |= FMODE_NOWAIT;
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
return generic_file_open(inode, filp);
}
......
This diff is collapsed.
......@@ -1080,7 +1080,7 @@ xfs_file_open(
return -EFBIG;
if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
return -EIO;
file->f_mode |= FMODE_NOWAIT;
file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
return 0;
}
......
......@@ -1189,6 +1189,7 @@ struct blk_plug {
struct list_head cb_list; /* md requires an unplug callback */
unsigned short rq_count;
bool multiple_queues;
bool nowait;
};
#define BLK_MAX_REQUEST_COUNT 16
#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
......
......@@ -175,6 +175,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File does not contribute to nr_files count */
#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
/* File supports async buffered reads */
#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000)
/*
* Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
* that indicates that they should check the contents of the iovec are
......@@ -315,6 +318,8 @@ enum rw_hint {
#define IOCB_SYNC (1 << 5)
#define IOCB_WRITE (1 << 6)
#define IOCB_NOWAIT (1 << 7)
/* iocb->ki_waitq is valid */
#define IOCB_WAITQ (1 << 8)
struct kiocb {
struct file *ki_filp;
......@@ -328,7 +333,10 @@ struct kiocb {
int ki_flags;
u16 ki_hint;
u16 ki_ioprio; /* See linux/ioprio.h */
unsigned int ki_cookie; /* for ->iopoll */
union {
unsigned int ki_cookie; /* for ->iopoll */
struct wait_page_queue *ki_waitq; /* for async buffered IO */
};
randomized_struct_fields_end
};
......
......@@ -496,8 +496,67 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
return pgoff;
}
/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
struct wait_page_key {
struct page *page;
int bit_nr;
int page_match;
};
struct wait_page_queue {
struct page *page;
int bit_nr;
wait_queue_entry_t wait;
};
static inline int wake_page_match(struct wait_page_queue *wait_page,
struct wait_page_key *key)
{
if (wait_page->page != key->page)
return 0;
key->page_match = 1;
if (wait_page->bit_nr != key->bit_nr)
return 0;
/*
* Stop walking if it's locked.
* Is this safe if put_and_wait_on_page_locked() is in use?
* Yes: the waker must hold a reference to this page, and if PG_locked
* has now already been set by another task, that task must also hold
* a reference to the *same usage* of this page; so there is no need
* to walk on to wake even the put_and_wait_on_page_locked() callers.
*/
if (test_bit(key->bit_nr, &key->page->flags))
return -1;
return 1;
}
static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
struct wait_page_queue *wait,
wait_queue_func_t func,
void *data)
{
/* Can't support async wakeup with polled IO */
if (kiocb->ki_flags & IOCB_HIPRI)
return -EINVAL;
if (kiocb->ki_filp->f_mode & FMODE_BUF_RASYNC) {
wait->wait.func = func;
wait->wait.private = data;
wait->wait.flags = 0;
INIT_LIST_HEAD(&wait->wait.entry);
kiocb->ki_flags |= IOCB_WAITQ;
kiocb->ki_waitq = wait;
return 0;
}
return -EOPNOTSUPP;
}
extern void __lock_page(struct page *page);
extern int __lock_page_killable(struct page *page);
extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags);
extern void unlock_page(struct page *page);
......@@ -534,6 +593,22 @@ static inline int lock_page_killable(struct page *page)
return 0;
}
/*
* lock_page_async - Lock the page, unless this would block. If the page
* is already locked, then queue a callback when the page becomes unlocked.
* This callback can then retry the operation.
*
* Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page
* was already locked and the callback defined in 'wait' was queued.
*/
static inline int lock_page_async(struct page *page,
struct wait_page_queue *wait)
{
if (!trylock_page(page))
return __lock_page_async(page, wait);
return 0;
}
/*
* lock_page_or_retry - Lock the page, unless this would block and the
* caller indicated that it can handle a retry.
......
......@@ -987,43 +987,16 @@ void __init pagecache_init(void)
page_writeback_init();
}
/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
struct wait_page_key {
struct page *page;
int bit_nr;
int page_match;
};
struct wait_page_queue {
struct page *page;
int bit_nr;
wait_queue_entry_t wait;
};
static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
{
struct wait_page_key *key = arg;
struct wait_page_queue *wait_page
= container_of(wait, struct wait_page_queue, wait);
int ret;
if (wait_page->page != key->page)
return 0;
key->page_match = 1;
if (wait_page->bit_nr != key->bit_nr)
return 0;
/*
* Stop walking if it's locked.
* Is this safe if put_and_wait_on_page_locked() is in use?
* Yes: the waker must hold a reference to this page, and if PG_locked
* has now already been set by another task, that task must also hold
* a reference to the *same usage* of this page; so there is no need
* to walk on to wake even the put_and_wait_on_page_locked() callers.
*/
if (test_bit(key->bit_nr, &key->page->flags))
return -1;
ret = wake_page_match(wait_page, key);
if (ret != 1)
return ret;
return autoremove_wake_function(wait, mode, sync, key);
}
......@@ -1207,6 +1180,44 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
}
EXPORT_SYMBOL(wait_on_page_bit_killable);
static int __wait_on_page_locked_async(struct page *page,
struct wait_page_queue *wait, bool set)
{
struct wait_queue_head *q = page_waitqueue(page);
int ret = 0;
wait->page = page;
wait->bit_nr = PG_locked;
spin_lock_irq(&q->lock);
__add_wait_queue_entry_tail(q, &wait->wait);
SetPageWaiters(page);
if (set)
ret = !trylock_page(page);
else
ret = PageLocked(page);
/*
* If we were succesful now, we know we're still on the
* waitqueue as we're still under the lock. This means it's
* safe to remove and return success, we know the callback
* isn't going to trigger.
*/
if (!ret)
__remove_wait_queue(q, &wait->wait);
else
ret = -EIOCBQUEUED;
spin_unlock_irq(&q->lock);
return ret;
}
static int wait_on_page_locked_async(struct page *page,
struct wait_page_queue *wait)
{
if (!PageLocked(page))
return 0;
return __wait_on_page_locked_async(compound_head(page), wait, false);
}
/**
* put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
* @page: The page to wait for.
......@@ -1369,6 +1380,11 @@ int __lock_page_killable(struct page *__page)
}
EXPORT_SYMBOL_GPL(__lock_page_killable);
int __lock_page_async(struct page *page, struct wait_page_queue *wait)
{
return __wait_on_page_locked_async(page, wait, true);
}
/*
* Return values:
* 1 - page is locked; mmap_lock is still held.
......@@ -2028,8 +2044,6 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
page = find_get_page(mapping, index);
if (!page) {
if (iocb->ki_flags & IOCB_NOWAIT)
goto would_block;
page_cache_sync_readahead(mapping,
ra, filp,
index, last_index - index);
......@@ -2043,17 +2057,25 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
index, last_index - index);
}
if (!PageUptodate(page)) {
if (iocb->ki_flags & IOCB_NOWAIT) {
put_page(page);
goto would_block;
}
/*
* See comment in do_read_cache_page on why
* wait_on_page_locked is used to avoid unnecessarily
* serialisations and why it's safe.
*/
error = wait_on_page_locked_killable(page);
if (iocb->ki_flags & IOCB_WAITQ) {
if (written) {
put_page(page);
goto out;
}
error = wait_on_page_locked_async(page,
iocb->ki_waitq);
} else {
if (iocb->ki_flags & IOCB_NOWAIT) {
put_page(page);
goto would_block;
}
error = wait_on_page_locked_killable(page);
}
if (unlikely(error))
goto readpage_error;
if (PageUptodate(page))
......@@ -2141,7 +2163,10 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
page_not_up_to_date:
/* Get exclusive access to the page ... */
error = lock_page_killable(page);
if (iocb->ki_flags & IOCB_WAITQ)
error = lock_page_async(page, iocb->ki_waitq);
else
error = lock_page_killable(page);
if (unlikely(error))
goto readpage_error;
......@@ -2160,6 +2185,11 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
}
readpage:
if (iocb->ki_flags & IOCB_NOWAIT) {
unlock_page(page);
put_page(page);
goto would_block;
}
/*
* A previous I/O error may have been due to temporary
* failures, eg. multipath errors.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment