Commit 3e9bff3b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfs-6.11-rc6.fixes' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:
 "VFS:

   - Ensure that backing files uses file->f_ops->splice_write() for
     splice

  netfs:

   - Revert the removal of PG_private_2 from netfs_release_folio() as
     cephfs still relies on this

   - When AS_RELEASE_ALWAYS is set on a mapping the folio needs to
     always be invalidated during truncation

   - Fix losing untruncated data in a folio by making letting
     netfs_release_folio() return false if the folio is dirty

   - Fix trimming of streaming-write folios in netfs_inval_folio()

   - Reset iterator before retrying a short read

   - Fix interaction of streaming writes with zero-point tracker

  afs:

   - During truncation afs currently calls truncate_setsize() which sets
     i_size, expands the pagecache and truncates it. The first two
     operations aren't needed because they will have already been done.
     So call truncate_pagecache() instead and skip the redundant parts

  overlayfs:

   - Fix checking of the number of allowed lower layers so 500 layers
     can actually be used instead of just 499

   - Add missing '\n' to pr_err() output

   - Pass string to ovl_parse_layer() and thus allow it to be used for
     Opt_lowerdir as well

  pidfd:

   - Revert blocking the creation of pidfds for kthread as apparently
     userspace relies on this. Specifically, it breaks systemd during
     shutdown

  romfs:

   - Fix romfs_read_folio() to use the correct offset with
     folio_zero_tail()"

* tag 'vfs-6.11-rc6.fixes' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs:
  netfs: Fix interaction of streaming writes with zero-point tracker
  netfs: Fix missing iterator reset on retry of short read
  netfs: Fix trimming of streaming-write folios in netfs_inval_folio()
  netfs: Fix netfs_release_folio() to say no if folio dirty
  afs: Fix post-setattr file edit to do truncation correctly
  mm: Fix missing folio invalidation calls during truncation
  ovl: ovl_parse_param_lowerdir: Add missed '\n' for pr_err
  ovl: fix wrong lowerdir number check for parameter Opt_lowerdir
  ovl: pass string to ovl_parse_layer()
  backing-file: convert to using fops->splice_write
  Revert "pidfd: prevent creation of pidfds for kthreads"
  romfs: fix romfs_read_folio()
  netfs, ceph: Partially revert "netfs: Replace PG_fscache by setting folio->private and marking dirty"
parents 5be63fc1 e00e99ba
......@@ -695,13 +695,18 @@ static void afs_setattr_edit_file(struct afs_operation *op)
{
struct afs_vnode_param *vp = &op->file[0];
struct afs_vnode *vnode = vp->vnode;
struct inode *inode = &vnode->netfs.inode;
if (op->setattr.attr->ia_valid & ATTR_SIZE) {
loff_t size = op->setattr.attr->ia_size;
loff_t i_size = op->setattr.old_i_size;
loff_t old = op->setattr.old_i_size;
/* Note: inode->i_size was updated by afs_apply_status() inside
* the I/O and callback locks.
*/
if (size != i_size) {
truncate_setsize(&vnode->netfs.inode, size);
if (size != old) {
truncate_pagecache(inode, size);
netfs_resize_file(&vnode->netfs, size, true);
fscache_resize_cookie(afs_vnode_cache(vnode), size);
}
......
......@@ -303,13 +303,16 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
return -EIO;
if (!out->f_op->splice_write)
return -EINVAL;
ret = file_remove_privs(ctx->user_file);
if (ret)
return ret;
old_cred = override_creds(ctx->cred);
file_start_write(out);
ret = iter_file_splice_write(pipe, out, ppos, len, flags);
ret = out->f_op->splice_write(pipe, out, ppos, len, flags);
file_end_write(out);
revert_creds(old_cred);
......
......@@ -695,6 +695,7 @@ void ceph_evict_inode(struct inode *inode)
percpu_counter_dec(&mdsc->metric.total_inodes);
netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
if (inode->i_state & I_PINNING_NETFS_WB)
ceph_fscache_unuse_cookie(inode, true);
......
......@@ -313,6 +313,7 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq)
netfs_reset_subreq_iter(rreq, subreq);
netfs_read_from_server(rreq, subreq);
} else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) {
netfs_reset_subreq_iter(rreq, subreq);
netfs_rreq_short_read(rreq, subreq);
}
}
......
......@@ -97,10 +97,22 @@ EXPORT_SYMBOL(netfs_clear_inode_writeback);
void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
{
struct netfs_folio *finfo;
struct netfs_inode *ctx = netfs_inode(folio_inode(folio));
size_t flen = folio_size(folio);
_enter("{%lx},%zx,%zx", folio->index, offset, length);
if (offset == 0 && length == flen) {
unsigned long long i_size = i_size_read(&ctx->inode);
unsigned long long fpos = folio_pos(folio), end;
end = umin(fpos + flen, i_size);
if (fpos < i_size && end > ctx->zero_point)
ctx->zero_point = end;
}
folio_wait_private_2(folio); /* [DEPRECATED] */
if (!folio_test_private(folio))
return;
......@@ -113,18 +125,34 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
/* We have a partially uptodate page from a streaming write. */
unsigned int fstart = finfo->dirty_offset;
unsigned int fend = fstart + finfo->dirty_len;
unsigned int end = offset + length;
unsigned int iend = offset + length;
if (offset >= fend)
return;
if (end <= fstart)
if (iend <= fstart)
return;
/* The invalidation region overlaps the data. If the region
* covers the start of the data, we either move along the start
* or just erase the data entirely.
*/
if (offset <= fstart) {
if (iend >= fend)
goto erase_completely;
/* Move the start of the data. */
finfo->dirty_len = fend - iend;
finfo->dirty_offset = offset;
return;
}
/* Reduce the length of the data if the invalidation region
* covers the tail part.
*/
if (iend >= fend) {
finfo->dirty_len = offset - fstart;
return;
if (offset <= fstart && end >= fend)
goto erase_completely;
if (offset <= fstart && end > fstart)
goto reduce_len;
if (offset > fstart && end >= fend)
goto move_start;
}
/* A partial write was split. The caller has already zeroed
* it, so just absorb the hole.
*/
......@@ -137,12 +165,6 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
folio_clear_uptodate(folio);
kfree(finfo);
return;
reduce_len:
finfo->dirty_len = offset + length - finfo->dirty_offset;
return;
move_start:
finfo->dirty_len -= offset - finfo->dirty_offset;
finfo->dirty_offset = offset;
}
EXPORT_SYMBOL(netfs_invalidate_folio);
......@@ -159,12 +181,20 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp)
struct netfs_inode *ctx = netfs_inode(folio_inode(folio));
unsigned long long end;
end = folio_pos(folio) + folio_size(folio);
if (folio_test_dirty(folio))
return false;
end = umin(folio_pos(folio) + folio_size(folio), i_size_read(&ctx->inode));
if (end > ctx->zero_point)
ctx->zero_point = end;
if (folio_test_private(folio))
return false;
if (unlikely(folio_test_private_2(folio))) { /* [DEPRECATED] */
if (current_is_kswapd() || !(gfp & __GFP_FS))
return false;
folio_wait_private_2(folio);
}
fscache_note_page_release(netfs_i_cookie(ctx));
return true;
}
......
......@@ -33,6 +33,7 @@
int netfs_folio_written_back(struct folio *folio)
{
enum netfs_folio_trace why = netfs_folio_trace_clear;
struct netfs_inode *ictx = netfs_inode(folio->mapping->host);
struct netfs_folio *finfo;
struct netfs_group *group = NULL;
int gcount = 0;
......@@ -41,6 +42,12 @@ int netfs_folio_written_back(struct folio *folio)
/* Streaming writes cannot be redirtied whilst under writeback,
* so discard the streaming record.
*/
unsigned long long fend;
fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len;
if (fend > ictx->zero_point)
ictx->zero_point = fend;
folio_detach_private(folio);
group = finfo->netfs_group;
gcount++;
......
......@@ -353,6 +353,8 @@ static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer,
case Opt_datadir_add:
ctx->nr_data++;
fallthrough;
case Opt_lowerdir:
fallthrough;
case Opt_lowerdir_add:
WARN_ON(ctx->nr >= ctx->capacity);
l = &ctx->lower[ctx->nr++];
......@@ -365,10 +367,9 @@ static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer,
}
}
static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param,
enum ovl_opt layer)
static int ovl_parse_layer(struct fs_context *fc, const char *layer_name, enum ovl_opt layer)
{
char *name = kstrdup(param->string, GFP_KERNEL);
char *name = kstrdup(layer_name, GFP_KERNEL);
bool upper = (layer == Opt_upperdir || layer == Opt_workdir);
struct path path;
int err;
......@@ -376,7 +377,7 @@ static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param,
if (!name)
return -ENOMEM;
if (upper)
if (upper || layer == Opt_lowerdir)
err = ovl_mount_dir(name, &path);
else
err = ovl_mount_dir_noesc(name, &path);
......@@ -432,7 +433,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
{
int err;
struct ovl_fs_context *ctx = fc->fs_private;
struct ovl_fs_context_layer *l;
char *dup = NULL, *iter;
ssize_t nr_lower, nr;
bool data_layer = false;
......@@ -449,7 +449,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
return 0;
if (*name == ':') {
pr_err("cannot append lower layer");
pr_err("cannot append lower layer\n");
return -EINVAL;
}
......@@ -472,35 +472,11 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
goto out_err;
}
if (nr_lower > ctx->capacity) {
err = -ENOMEM;
l = krealloc_array(ctx->lower, nr_lower, sizeof(*ctx->lower),
GFP_KERNEL_ACCOUNT);
if (!l)
goto out_err;
ctx->lower = l;
ctx->capacity = nr_lower;
}
iter = dup;
l = ctx->lower;
for (nr = 0; nr < nr_lower; nr++, l++) {
ctx->nr++;
memset(l, 0, sizeof(*l));
err = ovl_mount_dir(iter, &l->path);
for (nr = 0; nr < nr_lower; nr++) {
err = ovl_parse_layer(fc, iter, Opt_lowerdir);
if (err)
goto out_put;
err = ovl_mount_dir_check(fc, &l->path, Opt_lowerdir, iter, false);
if (err)
goto out_put;
err = -ENOMEM;
l->name = kstrdup(iter, GFP_KERNEL_ACCOUNT);
if (!l->name)
goto out_put;
goto out_err;
if (data_layer)
ctx->nr_data++;
......@@ -517,8 +493,8 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
* there are no data layers.
*/
if (ctx->nr_data > 0) {
pr_err("regular lower layers cannot follow data lower layers");
goto out_put;
pr_err("regular lower layers cannot follow data lower layers\n");
goto out_err;
}
data_layer = false;
......@@ -532,9 +508,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
kfree(dup);
return 0;
out_put:
ovl_reset_lowerdirs(ctx);
out_err:
kfree(dup);
......@@ -582,7 +555,7 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_datadir_add:
case Opt_upperdir:
case Opt_workdir:
err = ovl_parse_layer(fc, param, opt);
err = ovl_parse_layer(fc, param->string, opt);
break;
case Opt_default_permissions:
config->default_permissions = true;
......
......@@ -126,7 +126,7 @@ static int romfs_read_folio(struct file *file, struct folio *folio)
}
}
buf = folio_zero_tail(folio, fillsize, buf);
buf = folio_zero_tail(folio, fillsize, buf + fillsize);
kunmap_local(buf);
folio_end_read(folio, ret == 0);
return ret;
......
......@@ -2053,23 +2053,10 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
*/
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
{
if (!pid)
return -EINVAL;
scoped_guard(rcu) {
struct task_struct *tsk;
if (flags & PIDFD_THREAD)
tsk = pid_task(pid, PIDTYPE_PID);
else
tsk = pid_task(pid, PIDTYPE_TGID);
if (!tsk)
return -EINVAL;
bool thread = flags & PIDFD_THREAD;
/* Don't create pidfds for kernel threads for now. */
if (tsk->flags & PF_KTHREAD)
return -EINVAL;
}
if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID))
return -EINVAL;
return __pidfd_prepare(pid, flags, ret);
}
......@@ -2416,12 +2403,6 @@ __latent_entropy struct task_struct *copy_process(
if (clone_flags & CLONE_PIDFD) {
int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0;
/* Don't create pidfds for kernel threads for now. */
if (args->kthread) {
retval = -EINVAL;
goto bad_fork_free_pid;
}
/* Note that no task has been attached to @pid yet. */
retval = __pidfd_prepare(pid, flags, &pidfile);
if (retval < 0)
......
......@@ -157,7 +157,7 @@ static void truncate_cleanup_folio(struct folio *folio)
if (folio_mapped(folio))
unmap_mapping_folio(folio);
if (folio_has_private(folio))
if (folio_needs_release(folio))
folio_invalidate(folio, 0, folio_size(folio));
/*
......@@ -219,7 +219,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
if (!mapping_inaccessible(folio->mapping))
folio_zero_range(folio, offset, length);
if (folio_has_private(folio))
if (folio_needs_release(folio))
folio_invalidate(folio, offset, length);
if (!folio_test_large(folio))
return true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment