Commit 723ef24b authored by Kent Overstreet's avatar Kent Overstreet Committed by Linus Torvalds

mm/filemap/c: break generic_file_buffered_read up into multiple functions

Patch series "generic_file_buffered_read() improvements", v2.

generic_file_buffered_read() has turned into a real monstrosity to work
with.  And it's a major performance improvement, for both small random and
large sequential reads.  On my test box, 4k buffered random reads go from
~150k to ~250k iops, and the improvements to big sequential reads are even
bigger.

This incorporates the fix for IOCB_WAITQ handling that Jens just posted as
well, also factors out lock_page_for_iocb() to improve handling of the
various iocb flags.

This patch (of 2):

This is prep work for changing generic_file_buffered_read() to use
find_get_pages_contig() to batch up all the pagecache lookups.

This patch should be functionally identical to the existing code and
changes as little as of the flow control as possible.  More refactoring
could be done, this patch is intended to be relatively minimal.

Link: https://lkml.kernel.org/r/20201025212949.602194-1-kent.overstreet@gmail.com
Link: https://lkml.kernel.org/r/20201025212949.602194-2-kent.overstreet@gmail.comSigned-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 9cc7e96a
...@@ -2166,112 +2166,160 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra) ...@@ -2166,112 +2166,160 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
ra->ra_pages /= 4; ra->ra_pages /= 4;
} }
/** static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
* generic_file_buffered_read - generic file read routine
* @iocb: the iocb to read
* @iter: data destination
* @written: already copied
*
* This is a generic file read routine, and uses the
* mapping->a_ops->readpage() function for the actual low-level stuff.
*
* This is really ugly. But the goto's actually try to clarify some
* of the logic when it comes to error handling etc.
*
* Return:
* * total number of bytes copied, including those the were already @written
* * negative error code if nothing was copied
*/
ssize_t generic_file_buffered_read(struct kiocb *iocb,
struct iov_iter *iter, ssize_t written)
{ {
struct file *filp = iocb->ki_filp; if (iocb->ki_flags & IOCB_WAITQ)
struct address_space *mapping = filp->f_mapping; return lock_page_async(page, iocb->ki_waitq);
else if (iocb->ki_flags & IOCB_NOWAIT)
return trylock_page(page) ? 0 : -EAGAIN;
else
return lock_page_killable(page);
}
static int generic_file_buffered_read_page_ok(struct kiocb *iocb,
struct iov_iter *iter,
struct page *page)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct file_ra_state *ra = &filp->f_ra; struct file_ra_state *ra = &iocb->ki_filp->f_ra;
loff_t *ppos = &iocb->ki_pos; unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
pgoff_t index; unsigned int bytes, copied;
pgoff_t last_index; loff_t isize, end_offset;
pgoff_t prev_index;
unsigned long offset; /* offset into pagecache page */
unsigned int prev_offset;
int error = 0;
if (unlikely(*ppos >= inode->i_sb->s_maxbytes)) BUG_ON(iocb->ki_pos >> PAGE_SHIFT != page->index);
return 0;
iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
index = *ppos >> PAGE_SHIFT; /*
prev_index = ra->prev_pos >> PAGE_SHIFT; * i_size must be checked after we know the page is Uptodate.
prev_offset = ra->prev_pos & (PAGE_SIZE-1); *
last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT; * Checking i_size after the check allows us to calculate
offset = *ppos & ~PAGE_MASK; * the correct value for "bytes", which means the zero-filled
* part of the page is not copied back to userspace (unless
* another truncate extends the file - this is desired though).
*/
isize = i_size_read(inode);
if (unlikely(iocb->ki_pos >= isize))
return 1;
end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
bytes = min_t(loff_t, end_offset - iocb->ki_pos, PAGE_SIZE - offset);
/* If users can be writing to this page using arbitrary
* virtual addresses, take care about potential aliasing
* before reading the page on the kernel side.
*/
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
/* /*
* If we've already successfully copied some data, then we * Ok, we have the page, and it's up-to-date, so
* can no longer safely return -EIOCBQUEUED. Hence mark * now we can copy it to user space...
* an async read NOWAIT at that point.
*/ */
if (written && (iocb->ki_flags & IOCB_WAITQ))
iocb->ki_flags |= IOCB_NOWAIT;
for (;;) { copied = copy_page_to_iter(page, offset, bytes, iter);
struct page *page;
pgoff_t end_index;
loff_t isize;
unsigned long nr, ret;
cond_resched(); iocb->ki_pos += copied;
find_page:
if (fatal_signal_pending(current)) {
error = -EINTR;
goto out;
}
page = find_get_page(mapping, index); /*
if (!page) { * When a sequential read accesses a page several times,
if (iocb->ki_flags & IOCB_NOIO) * only mark it as accessed the first time.
goto would_block; */
page_cache_sync_readahead(mapping, if (iocb->ki_pos >> PAGE_SHIFT != ra->prev_pos >> PAGE_SHIFT)
ra, filp, mark_page_accessed(page);
index, last_index - index);
page = find_get_page(mapping, index); ra->prev_pos = iocb->ki_pos;
if (unlikely(page == NULL))
goto no_cached_page; if (copied < bytes)
return -EFAULT;
return !iov_iter_count(iter) || iocb->ki_pos == isize;
}
static struct page *
generic_file_buffered_read_readpage(struct kiocb *iocb,
struct file *filp,
struct address_space *mapping,
struct page *page)
{
struct file_ra_state *ra = &filp->f_ra;
int error;
if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
unlock_page(page);
put_page(page);
return ERR_PTR(-EAGAIN);
} }
if (PageReadahead(page)) {
if (iocb->ki_flags & IOCB_NOIO) { /*
* A previous I/O error may have been due to temporary
* failures, eg. multipath errors.
* PG_error will be set again if readpage fails.
*/
ClearPageError(page);
/* Start the actual read. The read will unlock the page. */
error = mapping->a_ops->readpage(filp, page);
if (unlikely(error)) {
put_page(page); put_page(page);
goto out; return error != AOP_TRUNCATED_PAGE ? ERR_PTR(error) : NULL;
} }
page_cache_async_readahead(mapping,
ra, filp, page, if (!PageUptodate(page)) {
index, last_index - index); error = lock_page_for_iocb(iocb, page);
if (unlikely(error)) {
put_page(page);
return ERR_PTR(error);
} }
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
if (page->mapping == NULL) {
/*
* invalidate_mapping_pages got it
*/
unlock_page(page);
put_page(page);
return NULL;
}
unlock_page(page);
shrink_readahead_size_eio(ra);
put_page(page);
return ERR_PTR(-EIO);
}
unlock_page(page);
}
return page;
}
static struct page *
generic_file_buffered_read_pagenotuptodate(struct kiocb *iocb,
struct file *filp,
struct iov_iter *iter,
struct page *page,
loff_t pos, loff_t count)
{
struct address_space *mapping = filp->f_mapping;
struct inode *inode = mapping->host;
int error;
/* /*
* See comment in do_read_cache_page on why * See comment in do_read_cache_page on why
* wait_on_page_locked is used to avoid unnecessarily * wait_on_page_locked is used to avoid unnecessarily
* serialisations and why it's safe. * serialisations and why it's safe.
*/ */
if (iocb->ki_flags & IOCB_WAITQ) { if (iocb->ki_flags & IOCB_WAITQ) {
if (written) {
put_page(page);
goto out;
}
error = wait_on_page_locked_async(page, error = wait_on_page_locked_async(page,
iocb->ki_waitq); iocb->ki_waitq);
} else { } else {
if (iocb->ki_flags & IOCB_NOWAIT) {
put_page(page);
goto would_block;
}
error = wait_on_page_locked_killable(page); error = wait_on_page_locked_killable(page);
} }
if (unlikely(error)) if (unlikely(error)) {
goto readpage_error; put_page(page);
return ERR_PTR(error);
}
if (PageUptodate(page)) if (PageUptodate(page))
goto page_ok; return page;
if (inode->i_blkbits == PAGE_SHIFT || if (inode->i_blkbits == PAGE_SHIFT ||
!mapping->a_ops->is_partially_uptodate) !mapping->a_ops->is_partially_uptodate)
...@@ -2285,195 +2333,186 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, ...@@ -2285,195 +2333,186 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
if (!page->mapping) if (!page->mapping)
goto page_not_up_to_date_locked; goto page_not_up_to_date_locked;
if (!mapping->a_ops->is_partially_uptodate(page, if (!mapping->a_ops->is_partially_uptodate(page,
offset, iter->count)) pos & ~PAGE_MASK, count))
goto page_not_up_to_date_locked; goto page_not_up_to_date_locked;
unlock_page(page); unlock_page(page);
} return page;
page_ok:
/*
* i_size must be checked after we know the page is Uptodate.
*
* Checking i_size after the check allows us to calculate
* the correct value for "nr", which means the zero-filled
* part of the page is not copied back to userspace (unless
* another truncate extends the file - this is desired though).
*/
isize = i_size_read(inode);
end_index = (isize - 1) >> PAGE_SHIFT;
if (unlikely(!isize || index > end_index)) {
put_page(page);
goto out;
}
/* nr is the maximum number of bytes to copy from this page */
nr = PAGE_SIZE;
if (index == end_index) {
nr = ((isize - 1) & ~PAGE_MASK) + 1;
if (nr <= offset) {
put_page(page);
goto out;
}
}
nr = nr - offset;
/* If users can be writing to this page using arbitrary
* virtual addresses, take care about potential aliasing
* before reading the page on the kernel side.
*/
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
/*
* When a sequential read accesses a page several times,
* only mark it as accessed the first time.
*/
if (prev_index != index || offset != prev_offset)
mark_page_accessed(page);
prev_index = index;
/*
* Ok, we have the page, and it's up-to-date, so
* now we can copy it to user space...
*/
ret = copy_page_to_iter(page, offset, nr, iter);
offset += ret;
index += offset >> PAGE_SHIFT;
offset &= ~PAGE_MASK;
prev_offset = offset;
put_page(page);
written += ret;
if (!iov_iter_count(iter))
goto out;
if (ret < nr) {
error = -EFAULT;
goto out;
}
continue;
page_not_up_to_date: page_not_up_to_date:
/* Get exclusive access to the page ... */ /* Get exclusive access to the page ... */
if (iocb->ki_flags & IOCB_WAITQ) { error = lock_page_for_iocb(iocb, page);
if (written) { if (unlikely(error)) {
put_page(page); put_page(page);
goto out; return ERR_PTR(error);
} }
error = lock_page_async(page, iocb->ki_waitq);
} else {
error = lock_page_killable(page);
}
if (unlikely(error))
goto readpage_error;
page_not_up_to_date_locked: page_not_up_to_date_locked:
/* Did it get truncated before we got the lock? */ /* Did it get truncated before we got the lock? */
if (!page->mapping) { if (!page->mapping) {
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
continue; return NULL;
} }
/* Did somebody else fill it already? */ /* Did somebody else fill it already? */
if (PageUptodate(page)) { if (PageUptodate(page)) {
unlock_page(page); unlock_page(page);
goto page_ok; return page;
} }
readpage: return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) { }
unlock_page(page);
static struct page *
generic_file_buffered_read_no_cached_page(struct kiocb *iocb,
struct iov_iter *iter)
{
struct file *filp = iocb->ki_filp;
struct address_space *mapping = filp->f_mapping;
pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
struct page *page;
int error;
if (iocb->ki_flags & IOCB_NOIO)
return ERR_PTR(-EAGAIN);
/*
* Ok, it wasn't cached, so we need to create a new
* page..
*/
page = page_cache_alloc(mapping);
if (!page)
return ERR_PTR(-ENOMEM);
error = add_to_page_cache_lru(page, mapping, index,
mapping_gfp_constraint(mapping, GFP_KERNEL));
if (error) {
put_page(page); put_page(page);
goto would_block; return error != -EEXIST ? ERR_PTR(error) : NULL;
}
return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
}
/**
* generic_file_buffered_read - generic file read routine
* @iocb: the iocb to read
* @iter: data destination
* @written: already copied
*
* This is a generic file read routine, and uses the
* mapping->a_ops->readpage() function for the actual low-level stuff.
*
* This is really ugly. But the goto's actually try to clarify some
* of the logic when it comes to error handling etc.
*
* Return:
* * total number of bytes copied, including those the were already @written
* * negative error code if nothing was copied
*/
ssize_t generic_file_buffered_read(struct kiocb *iocb,
struct iov_iter *iter, ssize_t written)
{
struct file *filp = iocb->ki_filp;
struct address_space *mapping = filp->f_mapping;
struct inode *inode = mapping->host;
struct file_ra_state *ra = &filp->f_ra;
size_t orig_count = iov_iter_count(iter);
pgoff_t last_index;
int error = 0;
if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
return 0;
iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
/*
* If we've already successfully copied some data, then we
* can no longer safely return -EIOCBQUEUED. Hence mark
* an async read NOWAIT at that point.
*/
if (written && (iocb->ki_flags & IOCB_WAITQ))
iocb->ki_flags |= IOCB_NOWAIT;
for (;;) {
pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
struct page *page;
cond_resched();
find_page:
if (fatal_signal_pending(current)) {
error = -EINTR;
goto out;
} }
/* /*
* A previous I/O error may have been due to temporary * We can't return -EIOCBQUEUED once we've done some work, so
* failures, eg. multipath errors. * ensure we don't block:
* PG_error will be set again if readpage fails.
*/ */
ClearPageError(page); if ((iocb->ki_flags & IOCB_WAITQ) &&
/* Start the actual read. The read will unlock the page. */ (written + orig_count - iov_iter_count(iter)))
error = mapping->a_ops->readpage(filp, page); iocb->ki_flags |= IOCB_NOWAIT;
if (unlikely(error)) { page = find_get_page(mapping, index);
if (error == AOP_TRUNCATED_PAGE) { if (!page) {
put_page(page); if (iocb->ki_flags & IOCB_NOIO)
error = 0; goto would_block;
page_cache_sync_readahead(mapping,
ra, filp,
index, last_index - index);
page = find_get_page(mapping, index);
if (unlikely(page == NULL)) {
page = generic_file_buffered_read_no_cached_page(iocb, iter);
if (!page)
goto find_page; goto find_page;
if (IS_ERR(page)) {
error = PTR_ERR(page);
goto out;
} }
goto readpage_error;
} }
}
if (!PageUptodate(page)) { if (PageReadahead(page)) {
if (iocb->ki_flags & IOCB_WAITQ) { if (iocb->ki_flags & IOCB_NOIO) {
if (written) {
put_page(page); put_page(page);
goto out; goto out;
} }
error = lock_page_async(page, iocb->ki_waitq); page_cache_async_readahead(mapping,
} else { ra, filp, page,
error = lock_page_killable(page); index, last_index - index);
} }
if (unlikely(error))
goto readpage_error;
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
if (page->mapping == NULL) { if (iocb->ki_flags & IOCB_NOWAIT) {
/*
* invalidate_mapping_pages got it
*/
unlock_page(page);
put_page(page); put_page(page);
goto find_page; error = -EAGAIN;
goto out;
} }
unlock_page(page); page = generic_file_buffered_read_pagenotuptodate(iocb,
shrink_readahead_size_eio(ra); filp, iter, page, iocb->ki_pos, iter->count);
error = -EIO; if (!page)
goto readpage_error; goto find_page;
if (IS_ERR(page)) {
error = PTR_ERR(page);
goto out;
} }
unlock_page(page);
} }
goto page_ok; error = generic_file_buffered_read_page_ok(iocb, iter, page);
readpage_error:
/* UHHUH! A synchronous read error occurred. Report it */
put_page(page); put_page(page);
goto out;
no_cached_page:
/*
* Ok, it wasn't cached, so we need to create a new
* page..
*/
page = page_cache_alloc(mapping);
if (!page) {
error = -ENOMEM;
goto out;
}
error = add_to_page_cache_lru(page, mapping, index,
mapping_gfp_constraint(mapping, GFP_KERNEL));
if (error) { if (error) {
put_page(page); if (error > 0)
if (error == -EEXIST) {
error = 0; error = 0;
goto find_page;
}
goto out; goto out;
} }
goto readpage;
} }
would_block: would_block:
error = -EAGAIN; error = -EAGAIN;
out: out:
ra->prev_pos = prev_index;
ra->prev_pos <<= PAGE_SHIFT;
ra->prev_pos |= prev_offset;
*ppos = ((loff_t)index << PAGE_SHIFT) + offset;
file_accessed(filp); file_accessed(filp);
written += orig_count - iov_iter_count(iter);
return written ? written : error; return written ? written : error;
} }
EXPORT_SYMBOL_GPL(generic_file_buffered_read); EXPORT_SYMBOL_GPL(generic_file_buffered_read);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment