Commit 41d8e767 authored by David Howells's avatar David Howells

netfs: Implement a write-through caching option

Provide a flag whereby a filesystem may request that cifs_perform_write()
perform write-through caching.  This involves putting pages directly into
writeback rather than dirty and attaching them to a write operation as we
go.

Further, the writes being made are limited to the byte range being written
rather than whole folios being written.  This can be used by cifs, for
example, to deal with strict byte-range locking.

This can't be used with content encryption as that may require expansion of
the write RPC beyond the write being made.

This doesn't affect writes via mmap - those are written back in the normal
way; similarly failed writethrough writes are marked dirty and left to
writeback to retry.  Another option would be to simply invalidate them, but
the contents can be simultaneously accessed by read() and through mmap.
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
parent 4a79616c
...@@ -26,6 +26,8 @@ enum netfs_how_to_modify { ...@@ -26,6 +26,8 @@ enum netfs_how_to_modify {
NETFS_FLUSH_CONTENT, /* Flush incompatible content. */ NETFS_FLUSH_CONTENT, /* Flush incompatible content. */
}; };
static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq);
static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
{ {
if (netfs_group && !folio_get_private(folio)) if (netfs_group && !folio_get_private(folio))
...@@ -133,6 +135,14 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, ...@@ -133,6 +135,14 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
struct netfs_inode *ctx = netfs_inode(inode); struct netfs_inode *ctx = netfs_inode(inode);
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.for_sync = true,
.nr_to_write = LONG_MAX,
.range_start = iocb->ki_pos,
.range_end = iocb->ki_pos + iter->count,
};
struct netfs_io_request *wreq = NULL;
struct netfs_folio *finfo; struct netfs_folio *finfo;
struct folio *folio; struct folio *folio;
enum netfs_how_to_modify howto; enum netfs_how_to_modify howto;
...@@ -143,6 +153,30 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, ...@@ -143,6 +153,30 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
bool maybe_trouble = false; bool maybe_trouble = false;
if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) ||
iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC))
) {
if (pos < i_size_read(inode)) {
ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count);
if (ret < 0) {
goto out;
}
}
wbc_attach_fdatawrite_inode(&wbc, mapping->host);
wreq = netfs_begin_writethrough(iocb, iter->count);
if (IS_ERR(wreq)) {
wbc_detach_inode(&wbc);
ret = PTR_ERR(wreq);
wreq = NULL;
goto out;
}
if (!is_sync_kiocb(iocb))
wreq->iocb = iocb;
wreq->cleanup = netfs_cleanup_buffered_write;
}
do { do {
size_t flen; size_t flen;
size_t offset; /* Offset into pagecache folio */ size_t offset; /* Offset into pagecache folio */
...@@ -315,7 +349,25 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, ...@@ -315,7 +349,25 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
} }
written += copied; written += copied;
folio_mark_dirty(folio); if (likely(!wreq)) {
folio_mark_dirty(folio);
} else {
if (folio_test_dirty(folio))
/* Sigh. mmap. */
folio_clear_dirty_for_io(folio);
/* We make multiple writes to the folio... */
if (!folio_test_writeback(folio)) {
folio_wait_fscache(folio);
folio_start_writeback(folio);
folio_start_fscache(folio);
if (wreq->iter.count == 0)
trace_netfs_folio(folio, netfs_folio_trace_wthru);
else
trace_netfs_folio(folio, netfs_folio_trace_wthru_plus);
}
netfs_advance_writethrough(wreq, copied,
offset + copied == flen);
}
retry: retry:
folio_unlock(folio); folio_unlock(folio);
folio_put(folio); folio_put(folio);
...@@ -325,17 +377,14 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, ...@@ -325,17 +377,14 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
} while (iov_iter_count(iter)); } while (iov_iter_count(iter));
out: out:
if (likely(written)) { if (unlikely(wreq)) {
/* Flush and wait for a write that requires immediate synchronisation. */ ret = netfs_end_writethrough(wreq, iocb);
if (iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) { wbc_detach_inode(&wbc);
_debug("dsync"); if (ret == -EIOCBQUEUED)
ret = filemap_fdatawait_range(mapping, iocb->ki_pos, return ret;
iocb->ki_pos + written);
}
iocb->ki_pos += written;
} }
iocb->ki_pos += written;
_leave(" = %zd [%zd]", written, ret); _leave(" = %zd [%zd]", written, ret);
return written ? written : ret; return written ? written : ret;
......
...@@ -101,6 +101,9 @@ static inline void netfs_see_request(struct netfs_io_request *rreq, ...@@ -101,6 +101,9 @@ static inline void netfs_see_request(struct netfs_io_request *rreq,
*/ */
int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait, int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
enum netfs_write_trace what); enum netfs_write_trace what);
struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len);
int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end);
int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb);
/* /*
* stats.c * stats.c
......
...@@ -30,6 +30,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = { ...@@ -30,6 +30,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
[NETFS_READPAGE] = "RP", [NETFS_READPAGE] = "RP",
[NETFS_READ_FOR_WRITE] = "RW", [NETFS_READ_FOR_WRITE] = "RW",
[NETFS_WRITEBACK] = "WB", [NETFS_WRITEBACK] = "WB",
[NETFS_WRITETHROUGH] = "WT",
[NETFS_LAUNDER_WRITE] = "LW", [NETFS_LAUNDER_WRITE] = "LW",
[NETFS_UNBUFFERED_WRITE] = "UW", [NETFS_UNBUFFERED_WRITE] = "UW",
[NETFS_DIO_READ] = "DR", [NETFS_DIO_READ] = "DR",
......
...@@ -41,6 +41,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, ...@@ -41,6 +41,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
rreq->i_size = i_size_read(inode); rreq->i_size = i_size_read(inode);
rreq->debug_id = atomic_inc_return(&debug_ids); rreq->debug_id = atomic_inc_return(&debug_ids);
INIT_LIST_HEAD(&rreq->subrequests); INIT_LIST_HEAD(&rreq->subrequests);
INIT_WORK(&rreq->work, NULL);
refcount_set(&rreq->ref, 1); refcount_set(&rreq->ref, 1);
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
......
...@@ -386,3 +386,93 @@ int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait, ...@@ -386,3 +386,93 @@ int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
return wreq->error; return wreq->error;
} }
/*
* Begin a write operation for writing through the pagecache.
*/
struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
{
struct netfs_io_request *wreq;
struct file *file = iocb->ki_filp;
wreq = netfs_alloc_request(file->f_mapping, file, iocb->ki_pos, len,
NETFS_WRITETHROUGH);
if (IS_ERR(wreq))
return wreq;
trace_netfs_write(wreq, netfs_write_trace_writethrough);
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
iov_iter_xarray(&wreq->iter, ITER_SOURCE, &wreq->mapping->i_pages, wreq->start, 0);
wreq->io_iter = wreq->iter;
/* ->outstanding > 0 carries a ref */
netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding);
atomic_set(&wreq->nr_outstanding, 1);
return wreq;
}
static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final)
{
struct netfs_inode *ictx = netfs_inode(wreq->inode);
unsigned long long start;
size_t len;
if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
return;
start = wreq->start + wreq->submitted;
len = wreq->iter.count - wreq->submitted;
if (!final) {
len /= wreq->wsize; /* Round to number of maximum packets */
len *= wreq->wsize;
}
ictx->ops->create_write_requests(wreq, start, len);
wreq->submitted += len;
}
/*
* Advance the state of the write operation used when writing through the
* pagecache. Data has been copied into the pagecache that we need to append
* to the request. If we've added more than wsize then we need to create a new
* subrequest.
*/
int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end)
{
_enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u",
wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end);
wreq->iter.count += copied;
wreq->io_iter.count += copied;
if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize)
netfs_submit_writethrough(wreq, false);
return wreq->error;
}
/*
* End a write operation used when writing through the pagecache.
*/
int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb)
{
int ret = -EIOCBQUEUED;
_enter("ic=%zu sb=%zu ws=%u",
wreq->iter.count, wreq->submitted, wreq->wsize);
if (wreq->submitted < wreq->io_iter.count)
netfs_submit_writethrough(wreq, true);
if (atomic_dec_and_test(&wreq->nr_outstanding))
netfs_write_terminated(wreq, false);
if (is_sync_kiocb(iocb)) {
wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
TASK_UNINTERRUPTIBLE);
ret = wreq->error;
}
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
return ret;
}
...@@ -139,6 +139,7 @@ struct netfs_inode { ...@@ -139,6 +139,7 @@ struct netfs_inode {
unsigned long flags; unsigned long flags;
#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */
}; };
/* /*
...@@ -227,6 +228,7 @@ enum netfs_io_origin { ...@@ -227,6 +228,7 @@ enum netfs_io_origin {
NETFS_READPAGE, /* This read is a synchronous read */ NETFS_READPAGE, /* This read is a synchronous read */
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
NETFS_WRITEBACK, /* This write was triggered by writepages */ NETFS_WRITEBACK, /* This write was triggered by writepages */
NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */
NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */ NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */
NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_DIO_READ, /* This is a direct I/O read */
......
...@@ -27,13 +27,15 @@ ...@@ -27,13 +27,15 @@
EM(netfs_write_trace_dio_write, "DIO-WRITE") \ EM(netfs_write_trace_dio_write, "DIO-WRITE") \
EM(netfs_write_trace_launder, "LAUNDER ") \ EM(netfs_write_trace_launder, "LAUNDER ") \
EM(netfs_write_trace_unbuffered_write, "UNB-WRITE") \ EM(netfs_write_trace_unbuffered_write, "UNB-WRITE") \
E_(netfs_write_trace_writeback, "WRITEBACK") EM(netfs_write_trace_writeback, "WRITEBACK") \
E_(netfs_write_trace_writethrough, "WRITETHRU")
#define netfs_rreq_origins \ #define netfs_rreq_origins \
EM(NETFS_READAHEAD, "RA") \ EM(NETFS_READAHEAD, "RA") \
EM(NETFS_READPAGE, "RP") \ EM(NETFS_READPAGE, "RP") \
EM(NETFS_READ_FOR_WRITE, "RW") \ EM(NETFS_READ_FOR_WRITE, "RW") \
EM(NETFS_WRITEBACK, "WB") \ EM(NETFS_WRITEBACK, "WB") \
EM(NETFS_WRITETHROUGH, "WT") \
EM(NETFS_LAUNDER_WRITE, "LW") \ EM(NETFS_LAUNDER_WRITE, "LW") \
EM(NETFS_UNBUFFERED_WRITE, "UW") \ EM(NETFS_UNBUFFERED_WRITE, "UW") \
EM(NETFS_DIO_READ, "DR") \ EM(NETFS_DIO_READ, "DR") \
...@@ -136,7 +138,9 @@ ...@@ -136,7 +138,9 @@
EM(netfs_folio_trace_redirty, "redirty") \ EM(netfs_folio_trace_redirty, "redirty") \
EM(netfs_folio_trace_redirtied, "redirtied") \ EM(netfs_folio_trace_redirtied, "redirtied") \
EM(netfs_folio_trace_store, "store") \ EM(netfs_folio_trace_store, "store") \
E_(netfs_folio_trace_store_plus, "store+") EM(netfs_folio_trace_store_plus, "store+") \
EM(netfs_folio_trace_wthru, "wthru") \
E_(netfs_folio_trace_wthru_plus, "wthru+")
#ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
#define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment