Commit cae2de69 authored by Stefan Roesch's avatar Stefan Roesch Committed by Jens Axboe

iomap: Add async buffered write support

This adds async buffered write support to iomap.

This replaces the call to balance_dirty_pages_ratelimited() with the
call to balance_dirty_pages_ratelimited_flags. This allows to specify if
the write request is async or not.

In addition this also moves the above function call to the beginning of
the function. If the function call is at the end of the function and the
decision is made to throttle writes, then there is no request that
io-uring can wait on. By moving it to the beginning of the function, the
write request is not issued, but returns -EAGAIN instead. io-uring will
punt the request and process it in the io-worker.

By moving the function call to the beginning of the function, the write
throttling will happen one page later.
Signed-off-by: default avatarStefan Roesch <shr@fb.com>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20220623175157.1715274-6-shr@fb.comSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 9753b868
...@@ -559,6 +559,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, ...@@ -559,6 +559,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
loff_t block_size = i_blocksize(iter->inode); loff_t block_size = i_blocksize(iter->inode);
loff_t block_start = round_down(pos, block_size); loff_t block_start = round_down(pos, block_size);
loff_t block_end = round_up(pos + len, block_size); loff_t block_end = round_up(pos + len, block_size);
unsigned int nr_blocks = i_blocks_per_folio(iter->inode, folio);
size_t from = offset_in_folio(folio, pos), to = from + len; size_t from = offset_in_folio(folio, pos), to = from + len;
size_t poff, plen; size_t poff, plen;
...@@ -567,6 +568,8 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, ...@@ -567,6 +568,8 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
folio_clear_error(folio); folio_clear_error(folio);
iop = iomap_page_create(iter->inode, folio, iter->flags); iop = iomap_page_create(iter->inode, folio, iter->flags);
if ((iter->flags & IOMAP_NOWAIT) && !iop && nr_blocks > 1)
return -EAGAIN;
do { do {
iomap_adjust_read_range(iter->inode, folio, &block_start, iomap_adjust_read_range(iter->inode, folio, &block_start,
...@@ -584,7 +587,12 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, ...@@ -584,7 +587,12 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
return -EIO; return -EIO;
folio_zero_segments(folio, poff, from, to, poff + plen); folio_zero_segments(folio, poff, from, to, poff + plen);
} else { } else {
int status = iomap_read_folio_sync(block_start, folio, int status;
if (iter->flags & IOMAP_NOWAIT)
return -EAGAIN;
status = iomap_read_folio_sync(block_start, folio,
poff, plen, srcmap); poff, plen, srcmap);
if (status) if (status)
return status; return status;
...@@ -613,6 +621,9 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, ...@@ -613,6 +621,9 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
unsigned fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE | FGP_NOFS; unsigned fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE | FGP_NOFS;
int status = 0; int status = 0;
if (iter->flags & IOMAP_NOWAIT)
fgp |= FGP_NOWAIT;
BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length); BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
if (srcmap != &iter->iomap) if (srcmap != &iter->iomap)
BUG_ON(pos + len > srcmap->offset + srcmap->length); BUG_ON(pos + len > srcmap->offset + srcmap->length);
...@@ -632,7 +643,7 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, ...@@ -632,7 +643,7 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
folio = __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT, folio = __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT,
fgp, mapping_gfp_mask(iter->inode->i_mapping)); fgp, mapping_gfp_mask(iter->inode->i_mapping));
if (!folio) { if (!folio) {
status = -ENOMEM; status = (iter->flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOMEM;
goto out_no_page; goto out_no_page;
} }
if (pos + len > folio_pos(folio) + folio_size(folio)) if (pos + len > folio_pos(folio) + folio_size(folio))
...@@ -750,6 +761,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) ...@@ -750,6 +761,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
loff_t pos = iter->pos; loff_t pos = iter->pos;
ssize_t written = 0; ssize_t written = 0;
long status = 0; long status = 0;
struct address_space *mapping = iter->inode->i_mapping;
unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
do { do {
struct folio *folio; struct folio *folio;
...@@ -762,6 +775,11 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) ...@@ -762,6 +775,11 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
bytes = min_t(unsigned long, PAGE_SIZE - offset, bytes = min_t(unsigned long, PAGE_SIZE - offset,
iov_iter_count(i)); iov_iter_count(i));
again: again:
status = balance_dirty_pages_ratelimited_flags(mapping,
bdp_flags);
if (unlikely(status))
break;
if (bytes > length) if (bytes > length)
bytes = length; bytes = length;
...@@ -770,6 +788,10 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) ...@@ -770,6 +788,10 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
* Otherwise there's a nasty deadlock on copying from the * Otherwise there's a nasty deadlock on copying from the
* same page as we're writing to, without it being marked * same page as we're writing to, without it being marked
* up-to-date. * up-to-date.
*
* For async buffered writes the assumption is that the user
* page has already been faulted in. This can be optimized by
* faulting the user page.
*/ */
if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) { if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) {
status = -EFAULT; status = -EFAULT;
...@@ -781,7 +803,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) ...@@ -781,7 +803,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
break; break;
page = folio_file_page(folio, pos >> PAGE_SHIFT); page = folio_file_page(folio, pos >> PAGE_SHIFT);
if (mapping_writably_mapped(iter->inode->i_mapping)) if (mapping_writably_mapped(mapping))
flush_dcache_page(page); flush_dcache_page(page);
copied = copy_page_from_iter_atomic(page, offset, bytes, i); copied = copy_page_from_iter_atomic(page, offset, bytes, i);
...@@ -806,8 +828,6 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) ...@@ -806,8 +828,6 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
pos += status; pos += status;
written += status; written += status;
length -= status; length -= status;
balance_dirty_pages_ratelimited(iter->inode->i_mapping);
} while (iov_iter_count(i) && length); } while (iov_iter_count(i) && length);
return written ? written : status; return written ? written : status;
...@@ -825,6 +845,9 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, ...@@ -825,6 +845,9 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
}; };
int ret; int ret;
if (iocb->ki_flags & IOCB_NOWAIT)
iter.flags |= IOMAP_NOWAIT;
while ((ret = iomap_iter(&iter, ops)) > 0) while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_write_iter(&iter, i); iter.processed = iomap_write_iter(&iter, i);
if (iter.pos == iocb->ki_pos) if (iter.pos == iocb->ki_pos)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment