Commit e4c07ec8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfs-6.10-rc2.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:

 - Fix io_uring based write-through after converting cifs to use the
   netfs library

 - Fix aio error handling when doing write-through via netfs library

 - Fix performance regression in iomap when used with non-large folio
   mappings

 - Fix signalfd error code

 - Remove obsolete comment in signalfd code

 - Fix async request indication in netfs_perform_write() by raising
   BDP_ASYNC when IOCB_NOWAIT is set

 - Yield swap device immediately to prevent spurious EBUSY errors

 - Don't cross a .backup mountpoint from backup volumes in afs to avoid
   infinite loops

 - Fix a race between umount and async request completion in 9p after 9p
   was converted to use the netfs library

* tag 'vfs-6.10-rc2.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  netfs, 9p: Fix race between umount and async request completion
  afs: Don't cross .backup mountpoint from backup volume
  swap: yield device immediately
  netfs: Fix setting of BDP_ASYNC from iocb flags
  signalfd: drop an obsolete comment
  signalfd: fix error return code
  iomap: fault in smaller chunks for non-large folio mappings
  filemap: add helper mapping_max_folio_size()
  netfs: Fix AIO error handling when doing write-through
  netfs: Fix io_uring based write-through
parents 1613e604 f89ea63f
......@@ -348,6 +348,7 @@ void v9fs_evict_inode(struct inode *inode)
__le32 __maybe_unused version;
if (!is_bad_inode(inode)) {
netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
version = cpu_to_le32(v9inode->qid.version);
......
......@@ -648,6 +648,7 @@ void afs_evict_inode(struct inode *inode)
ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
afs_set_cache_aux(vnode, &aux);
......
......@@ -140,6 +140,11 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
put_page(page);
if (ret < 0)
return ret;
/* Don't cross a backup volume mountpoint from a backup volume */
if (src_as->volume && src_as->volume->type == AFSVL_BACKVOL &&
ctx->type == AFSVL_BACKVOL)
return -ENODEV;
}
return 0;
......
......@@ -898,11 +898,11 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
{
loff_t length = iomap_length(iter);
size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
loff_t pos = iter->pos;
ssize_t total_written = 0;
long status = 0;
struct address_space *mapping = iter->inode->i_mapping;
size_t chunk = mapping_max_folio_size(mapping);
unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
do {
......
......@@ -181,7 +181,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
struct folio *folio, *writethrough = NULL;
enum netfs_how_to_modify howto;
enum netfs_folio_trace trace;
unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC;
unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
ssize_t written = 0, ret, ret2;
loff_t i_size, pos = iocb->ki_pos, from, to;
size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
......
......@@ -12,7 +12,7 @@
static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
{
struct inode *inode = wreq->inode;
unsigned long long end = wreq->start + wreq->len;
unsigned long long end = wreq->start + wreq->transferred;
if (!wreq->error &&
i_size_read(inode) < end) {
......
......@@ -72,6 +72,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
}
}
atomic_inc(&ctx->io_count);
trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new);
netfs_proc_add_rreq(rreq);
netfs_stat(&netfs_n_rh_rreq);
......@@ -124,6 +125,7 @@ static void netfs_free_request(struct work_struct *work)
{
struct netfs_io_request *rreq =
container_of(work, struct netfs_io_request, work);
struct netfs_inode *ictx = netfs_inode(rreq->inode);
unsigned int i;
trace_netfs_rreq(rreq, netfs_rreq_trace_free);
......@@ -142,6 +144,9 @@ static void netfs_free_request(struct work_struct *work)
}
kvfree(rreq->direct_bv);
}
if (atomic_dec_and_test(&ictx->io_count))
wake_up_var(&ictx->io_count);
call_rcu(&rreq->rcu, netfs_free_request_rcu);
}
......
......@@ -510,7 +510,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
* stream has a gap that can be jumped.
*/
if (notes & SOME_EMPTY) {
unsigned long long jump_to = wreq->start + wreq->len;
unsigned long long jump_to = wreq->start + READ_ONCE(wreq->submitted);
for (s = 0; s < NR_IO_STREAMS; s++) {
stream = &wreq->io_streams[s];
......@@ -690,10 +690,11 @@ void netfs_write_collection_worker(struct work_struct *work)
wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
if (wreq->iocb) {
wreq->iocb->ki_pos += wreq->transferred;
size_t written = min(wreq->transferred, wreq->len);
wreq->iocb->ki_pos += written;
if (wreq->iocb->ki_complete)
wreq->iocb->ki_complete(
wreq->iocb, wreq->error ? wreq->error : wreq->transferred);
wreq->iocb, wreq->error ? wreq->error : written);
wreq->iocb = VFS_PTR_POISON;
}
......
......@@ -254,7 +254,7 @@ static void netfs_issue_write(struct netfs_io_request *wreq,
stream->construct = NULL;
if (subreq->start + subreq->len > wreq->start + wreq->submitted)
wreq->len = wreq->submitted = subreq->start + subreq->len - wreq->start;
WRITE_ONCE(wreq->submitted, subreq->start + subreq->len - wreq->start);
netfs_do_issue_write(stream, subreq);
}
......@@ -636,7 +636,12 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
mutex_unlock(&ictx->wb_lock);
ret = wreq->error;
if (wreq->iocb) {
ret = -EIOCBQUEUED;
} else {
wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
ret = wreq->error;
}
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
return ret;
}
......
......@@ -282,14 +282,10 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
if (IS_ERR(file)) {
put_unused_fd(ufd);
kfree(ctx);
return ufd;
return PTR_ERR(file);
}
file->f_mode |= FMODE_NOWAIT;
/*
* When we call this, the initialization must be complete, since
* anon_inode_getfd() will install the fd.
*/
fd_install(ufd, file);
} else {
struct fd f = fdget(ufd);
......
......@@ -431,6 +431,7 @@ cifs_free_inode(struct inode *inode)
static void
cifs_evict_inode(struct inode *inode)
{
netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
if (inode->i_state & I_PINNING_NETFS_WB)
cifs_fscache_unuse_inode_cookie(inode, true);
......
......@@ -68,6 +68,7 @@ struct netfs_inode {
loff_t remote_i_size; /* Size of the remote file */
loff_t zero_point; /* Size after which we assume there's no data
* on the server */
atomic_t io_count; /* Number of outstanding reqs */
unsigned long flags;
#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
......@@ -474,6 +475,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
ctx->remote_i_size = i_size_read(&ctx->inode);
ctx->zero_point = LLONG_MAX;
ctx->flags = 0;
atomic_set(&ctx->io_count, 0);
#if IS_ENABLED(CONFIG_FSCACHE)
ctx->cache = NULL;
#endif
......@@ -517,4 +519,20 @@ static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)
#endif
}
/**
* netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete
* @ctx: The netfs inode to wait on
*
* Wait for outstanding I/O requests of any type to complete. This is intended
* to be called from inode eviction routines. This makes sure that any
* resources held by those requests are cleaned up before we let the inode get
* cleaned up.
*/
static inline void netfs_wait_for_outstanding_io(struct inode *inode)
{
struct netfs_inode *ictx = netfs_inode(inode);
wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0);
}
#endif /* _LINUX_NETFS_H */
......@@ -346,6 +346,19 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
m->gfp_mask = mask;
}
/*
* There are some parts of the kernel which assume that PMD entries
* are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
* limit the maximum allocation order to PMD size. I'm not aware of any
* assumptions about maximum order if THP are disabled, but 8 seems like
* a good order (that's 1MB if you're using 4kB pages)
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
#else
#define MAX_PAGECACHE_ORDER 8
#endif
/**
* mapping_set_large_folios() - Indicate the file supports large folios.
* @mapping: The file.
......@@ -372,6 +385,14 @@ static inline bool mapping_large_folio_support(struct address_space *mapping)
test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
}
/* Return the maximum folio size for this pagecache mapping, in bytes. */
static inline size_t mapping_max_folio_size(struct address_space *mapping)
{
if (mapping_large_folio_support(mapping))
return PAGE_SIZE << MAX_PAGECACHE_ORDER;
return PAGE_SIZE;
}
static inline int filemap_nr_thps(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
......@@ -530,19 +551,6 @@ static inline void *detach_page_private(struct page *page)
return folio_detach_private(page_folio(page));
}
/*
* There are some parts of the kernel which assume that PMD entries
* are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
* limit the maximum allocation order to PMD size. I'm not aware of any
* assumptions about maximum order if THP are disabled, but 8 seems like
* a good order (that's 1MB if you're using 4kB pages)
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
#else
#define MAX_PAGECACHE_ORDER 8
#endif
#ifdef CONFIG_NUMA
struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order);
#else
......
......@@ -1595,7 +1595,7 @@ int swsusp_check(bool exclusive)
put:
if (error)
fput(hib_resume_bdev_file);
bdev_fput(hib_resume_bdev_file);
else
pr_debug("Image signature found, resuming\n");
} else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment