Commit e4c07ec8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfs-6.10-rc2.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:

 - Fix io_uring based write-through after converting cifs to use the
   netfs library

 - Fix aio error handling when doing write-through via netfs library

 - Fix performance regression in iomap when used with non-large folio
   mappings

 - Fix signalfd error code

 - Remove obsolete comment in signalfd code

 - Fix async request indication in netfs_perform_write() by raising
   BDP_ASYNC when IOCB_NOWAIT is set

 - Yield swap device immediately to prevent spurious EBUSY errors

 - Don't cross a .backup mountpoint from backup volumes in afs to avoid
   infinite loops

 - Fix a race between umount and async request completion in 9p after 9p
   was converted to use the netfs library

* tag 'vfs-6.10-rc2.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  netfs, 9p: Fix race between umount and async request completion
  afs: Don't cross .backup mountpoint from backup volume
  swap: yield device immediately
  netfs: Fix setting of BDP_ASYNC from iocb flags
  signalfd: drop an obsolete comment
  signalfd: fix error return code
  iomap: fault in smaller chunks for non-large folio mappings
  filemap: add helper mapping_max_folio_size()
  netfs: Fix AIO error handling when doing write-through
  netfs: Fix io_uring based write-through
parents 1613e604 f89ea63f
...@@ -348,6 +348,7 @@ void v9fs_evict_inode(struct inode *inode) ...@@ -348,6 +348,7 @@ void v9fs_evict_inode(struct inode *inode)
__le32 __maybe_unused version; __le32 __maybe_unused version;
if (!is_bad_inode(inode)) { if (!is_bad_inode(inode)) {
netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data); truncate_inode_pages_final(&inode->i_data);
version = cpu_to_le32(v9inode->qid.version); version = cpu_to_le32(v9inode->qid.version);
......
...@@ -648,6 +648,7 @@ void afs_evict_inode(struct inode *inode) ...@@ -648,6 +648,7 @@ void afs_evict_inode(struct inode *inode)
ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data); truncate_inode_pages_final(&inode->i_data);
afs_set_cache_aux(vnode, &aux); afs_set_cache_aux(vnode, &aux);
......
...@@ -140,6 +140,11 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) ...@@ -140,6 +140,11 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
put_page(page); put_page(page);
if (ret < 0) if (ret < 0)
return ret; return ret;
/* Don't cross a backup volume mountpoint from a backup volume */
if (src_as->volume && src_as->volume->type == AFSVL_BACKVOL &&
ctx->type == AFSVL_BACKVOL)
return -ENODEV;
} }
return 0; return 0;
......
...@@ -898,11 +898,11 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, ...@@ -898,11 +898,11 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
{ {
loff_t length = iomap_length(iter); loff_t length = iomap_length(iter);
size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
loff_t pos = iter->pos; loff_t pos = iter->pos;
ssize_t total_written = 0; ssize_t total_written = 0;
long status = 0; long status = 0;
struct address_space *mapping = iter->inode->i_mapping; struct address_space *mapping = iter->inode->i_mapping;
size_t chunk = mapping_max_folio_size(mapping);
unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0; unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
do { do {
......
...@@ -181,7 +181,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, ...@@ -181,7 +181,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
struct folio *folio, *writethrough = NULL; struct folio *folio, *writethrough = NULL;
enum netfs_how_to_modify howto; enum netfs_how_to_modify howto;
enum netfs_folio_trace trace; enum netfs_folio_trace trace;
unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC; unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
ssize_t written = 0, ret, ret2; ssize_t written = 0, ret, ret2;
loff_t i_size, pos = iocb->ki_pos, from, to; loff_t i_size, pos = iocb->ki_pos, from, to;
size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
static void netfs_cleanup_dio_write(struct netfs_io_request *wreq) static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
{ {
struct inode *inode = wreq->inode; struct inode *inode = wreq->inode;
unsigned long long end = wreq->start + wreq->len; unsigned long long end = wreq->start + wreq->transferred;
if (!wreq->error && if (!wreq->error &&
i_size_read(inode) < end) { i_size_read(inode) < end) {
......
...@@ -72,6 +72,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, ...@@ -72,6 +72,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
} }
} }
atomic_inc(&ctx->io_count);
trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new); trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new);
netfs_proc_add_rreq(rreq); netfs_proc_add_rreq(rreq);
netfs_stat(&netfs_n_rh_rreq); netfs_stat(&netfs_n_rh_rreq);
...@@ -124,6 +125,7 @@ static void netfs_free_request(struct work_struct *work) ...@@ -124,6 +125,7 @@ static void netfs_free_request(struct work_struct *work)
{ {
struct netfs_io_request *rreq = struct netfs_io_request *rreq =
container_of(work, struct netfs_io_request, work); container_of(work, struct netfs_io_request, work);
struct netfs_inode *ictx = netfs_inode(rreq->inode);
unsigned int i; unsigned int i;
trace_netfs_rreq(rreq, netfs_rreq_trace_free); trace_netfs_rreq(rreq, netfs_rreq_trace_free);
...@@ -142,6 +144,9 @@ static void netfs_free_request(struct work_struct *work) ...@@ -142,6 +144,9 @@ static void netfs_free_request(struct work_struct *work)
} }
kvfree(rreq->direct_bv); kvfree(rreq->direct_bv);
} }
if (atomic_dec_and_test(&ictx->io_count))
wake_up_var(&ictx->io_count);
call_rcu(&rreq->rcu, netfs_free_request_rcu); call_rcu(&rreq->rcu, netfs_free_request_rcu);
} }
......
...@@ -510,7 +510,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) ...@@ -510,7 +510,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
* stream has a gap that can be jumped. * stream has a gap that can be jumped.
*/ */
if (notes & SOME_EMPTY) { if (notes & SOME_EMPTY) {
unsigned long long jump_to = wreq->start + wreq->len; unsigned long long jump_to = wreq->start + READ_ONCE(wreq->submitted);
for (s = 0; s < NR_IO_STREAMS; s++) { for (s = 0; s < NR_IO_STREAMS; s++) {
stream = &wreq->io_streams[s]; stream = &wreq->io_streams[s];
...@@ -690,10 +690,11 @@ void netfs_write_collection_worker(struct work_struct *work) ...@@ -690,10 +690,11 @@ void netfs_write_collection_worker(struct work_struct *work)
wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
if (wreq->iocb) { if (wreq->iocb) {
wreq->iocb->ki_pos += wreq->transferred; size_t written = min(wreq->transferred, wreq->len);
wreq->iocb->ki_pos += written;
if (wreq->iocb->ki_complete) if (wreq->iocb->ki_complete)
wreq->iocb->ki_complete( wreq->iocb->ki_complete(
wreq->iocb, wreq->error ? wreq->error : wreq->transferred); wreq->iocb, wreq->error ? wreq->error : written);
wreq->iocb = VFS_PTR_POISON; wreq->iocb = VFS_PTR_POISON;
} }
......
...@@ -254,7 +254,7 @@ static void netfs_issue_write(struct netfs_io_request *wreq, ...@@ -254,7 +254,7 @@ static void netfs_issue_write(struct netfs_io_request *wreq,
stream->construct = NULL; stream->construct = NULL;
if (subreq->start + subreq->len > wreq->start + wreq->submitted) if (subreq->start + subreq->len > wreq->start + wreq->submitted)
wreq->len = wreq->submitted = subreq->start + subreq->len - wreq->start; WRITE_ONCE(wreq->submitted, subreq->start + subreq->len - wreq->start);
netfs_do_issue_write(stream, subreq); netfs_do_issue_write(stream, subreq);
} }
...@@ -636,7 +636,12 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr ...@@ -636,7 +636,12 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
mutex_unlock(&ictx->wb_lock); mutex_unlock(&ictx->wb_lock);
ret = wreq->error; if (wreq->iocb) {
ret = -EIOCBQUEUED;
} else {
wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
ret = wreq->error;
}
netfs_put_request(wreq, false, netfs_rreq_trace_put_return); netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
return ret; return ret;
} }
......
...@@ -282,14 +282,10 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags) ...@@ -282,14 +282,10 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
if (IS_ERR(file)) { if (IS_ERR(file)) {
put_unused_fd(ufd); put_unused_fd(ufd);
kfree(ctx); kfree(ctx);
return ufd; return PTR_ERR(file);
} }
file->f_mode |= FMODE_NOWAIT; file->f_mode |= FMODE_NOWAIT;
/*
* When we call this, the initialization must be complete, since
* anon_inode_getfd() will install the fd.
*/
fd_install(ufd, file); fd_install(ufd, file);
} else { } else {
struct fd f = fdget(ufd); struct fd f = fdget(ufd);
......
...@@ -431,6 +431,7 @@ cifs_free_inode(struct inode *inode) ...@@ -431,6 +431,7 @@ cifs_free_inode(struct inode *inode)
static void static void
cifs_evict_inode(struct inode *inode) cifs_evict_inode(struct inode *inode)
{ {
netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data); truncate_inode_pages_final(&inode->i_data);
if (inode->i_state & I_PINNING_NETFS_WB) if (inode->i_state & I_PINNING_NETFS_WB)
cifs_fscache_unuse_inode_cookie(inode, true); cifs_fscache_unuse_inode_cookie(inode, true);
......
...@@ -68,6 +68,7 @@ struct netfs_inode { ...@@ -68,6 +68,7 @@ struct netfs_inode {
loff_t remote_i_size; /* Size of the remote file */ loff_t remote_i_size; /* Size of the remote file */
loff_t zero_point; /* Size after which we assume there's no data loff_t zero_point; /* Size after which we assume there's no data
* on the server */ * on the server */
atomic_t io_count; /* Number of outstanding reqs */
unsigned long flags; unsigned long flags;
#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
...@@ -474,6 +475,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx, ...@@ -474,6 +475,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
ctx->remote_i_size = i_size_read(&ctx->inode); ctx->remote_i_size = i_size_read(&ctx->inode);
ctx->zero_point = LLONG_MAX; ctx->zero_point = LLONG_MAX;
ctx->flags = 0; ctx->flags = 0;
atomic_set(&ctx->io_count, 0);
#if IS_ENABLED(CONFIG_FSCACHE) #if IS_ENABLED(CONFIG_FSCACHE)
ctx->cache = NULL; ctx->cache = NULL;
#endif #endif
...@@ -517,4 +519,20 @@ static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx) ...@@ -517,4 +519,20 @@ static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)
#endif #endif
} }
/**
* netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete
* @ctx: The netfs inode to wait on
*
* Wait for outstanding I/O requests of any type to complete. This is intended
* to be called from inode eviction routines. This makes sure that any
* resources held by those requests are cleaned up before we let the inode get
* cleaned up.
*/
static inline void netfs_wait_for_outstanding_io(struct inode *inode)
{
struct netfs_inode *ictx = netfs_inode(inode);
wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0);
}
#endif /* _LINUX_NETFS_H */ #endif /* _LINUX_NETFS_H */
...@@ -346,6 +346,19 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) ...@@ -346,6 +346,19 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
m->gfp_mask = mask; m->gfp_mask = mask;
} }
/*
* There are some parts of the kernel which assume that PMD entries
* are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
* limit the maximum allocation order to PMD size. I'm not aware of any
* assumptions about maximum order if THP are disabled, but 8 seems like
* a good order (that's 1MB if you're using 4kB pages)
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
#else
#define MAX_PAGECACHE_ORDER 8
#endif
/** /**
* mapping_set_large_folios() - Indicate the file supports large folios. * mapping_set_large_folios() - Indicate the file supports large folios.
* @mapping: The file. * @mapping: The file.
...@@ -372,6 +385,14 @@ static inline bool mapping_large_folio_support(struct address_space *mapping) ...@@ -372,6 +385,14 @@ static inline bool mapping_large_folio_support(struct address_space *mapping)
test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
} }
/* Return the maximum folio size for this pagecache mapping, in bytes. */
static inline size_t mapping_max_folio_size(struct address_space *mapping)
{
if (mapping_large_folio_support(mapping))
return PAGE_SIZE << MAX_PAGECACHE_ORDER;
return PAGE_SIZE;
}
static inline int filemap_nr_thps(struct address_space *mapping) static inline int filemap_nr_thps(struct address_space *mapping)
{ {
#ifdef CONFIG_READ_ONLY_THP_FOR_FS #ifdef CONFIG_READ_ONLY_THP_FOR_FS
...@@ -530,19 +551,6 @@ static inline void *detach_page_private(struct page *page) ...@@ -530,19 +551,6 @@ static inline void *detach_page_private(struct page *page)
return folio_detach_private(page_folio(page)); return folio_detach_private(page_folio(page));
} }
/*
* There are some parts of the kernel which assume that PMD entries
* are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
* limit the maximum allocation order to PMD size. I'm not aware of any
* assumptions about maximum order if THP are disabled, but 8 seems like
* a good order (that's 1MB if you're using 4kB pages)
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
#else
#define MAX_PAGECACHE_ORDER 8
#endif
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order); struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order);
#else #else
......
...@@ -1595,7 +1595,7 @@ int swsusp_check(bool exclusive) ...@@ -1595,7 +1595,7 @@ int swsusp_check(bool exclusive)
put: put:
if (error) if (error)
fput(hib_resume_bdev_file); bdev_fput(hib_resume_bdev_file);
else else
pr_debug("Image signature found, resuming\n"); pr_debug("Image signature found, resuming\n");
} else { } else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment