Commit 84ab1277 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'v6.6-vfs.fs_context' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull mount API updates from Christian Brauner:
 "This introduces FSCONFIG_CMD_CREATE_EXCL which allows userspace to
  implement something like

      $ mount -t ext4 --exclusive /dev/sda /B

  which fails if a superblock for the requested filesystem does already
  exist instead of silently reusing an existing superblock.

  Without it, in the sequence

      $ move-mount -f xfs -o       source=/dev/sda4 /A
      $ move-mount -f xfs -o noacl,source=/dev/sda4 /B

  the initial mounter will create a superblock. The second mounter will
  reuse the existing superblock, creating a bind-mount (see [1] for the
  source of the move-mount binary).

  The problem is that reusing an existing superblock means all mount
  options other than read-only and read-write will be silently ignored
  even if they are incompatible requests. For example, the second mount
  has requested no POSIX ACL support but since the existing superblock
  is reused POSIX ACL support will remain enabled.

  Such silent superblock reuse can easily become a security issue.

  After adding support for FSCONFIG_CMD_CREATE_EXCL to mount(8) in
  util-linux this can be fixed:

      $ move-mount -f xfs --exclusive -o       source=/dev/sda4 /A
      $ move-mount -f xfs --exclusive -o noacl,source=/dev/sda4 /B
      Device or resource busy | move-mount.c: 300: do_fsconfig: i xfs: reusing existing filesystem not allowed

  This requires the new mount api. With the old mount api it would be
  necessary to plumb this through every legacy filesystem's
  file_system_type->mount() method. If they want this feature they are
  most welcome to switch to the new mount api"

Link: https://github.com/brauner/move-mount-beneath [1]
Link: https://lore.kernel.org/linux-block/20230704-fasching-wertarbeit-7c6ffb01c83d@brauner
Link: https://lore.kernel.org/linux-block/20230705-pumpwerk-vielversprechend-a4b1fd947b65@brauner
Link: https://lore.kernel.org/linux-fsdevel/20230725-einnahmen-warnschilder-17779aec0a97@brauner
Link: https://lore.kernel.org/lkml/20230824-anzog-allheilmittel-e8c63e429a79@brauner/

* tag 'v6.6-vfs.fs_context' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  fs: add FSCONFIG_CMD_CREATE_EXCL
  fs: add vfs_cmd_reconfigure()
  fs: add vfs_cmd_create()
  super: remove get_tree_single_reconf()
parents 2dde18cd 22ed7ecd
......@@ -692,6 +692,7 @@ void vfs_clean_context(struct fs_context *fc)
security_free_mnt_opts(&fc->security);
kfree(fc->source);
fc->source = NULL;
fc->exclusive = false;
fc->purpose = FS_CONTEXT_FOR_RECONFIGURE;
fc->phase = FS_CONTEXT_AWAITING_RECONF;
......
......@@ -209,6 +209,72 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags
return ret;
}
static int vfs_cmd_create(struct fs_context *fc, bool exclusive)
{
struct super_block *sb;
int ret;
if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
return -EBUSY;
if (!mount_capable(fc))
return -EPERM;
/* require the new mount api */
if (exclusive && fc->ops == &legacy_fs_context_ops)
return -EOPNOTSUPP;
fc->phase = FS_CONTEXT_CREATING;
fc->exclusive = exclusive;
ret = vfs_get_tree(fc);
if (ret) {
fc->phase = FS_CONTEXT_FAILED;
return ret;
}
sb = fc->root->d_sb;
ret = security_sb_kern_mount(sb);
if (unlikely(ret)) {
fc_drop_locked(fc);
fc->phase = FS_CONTEXT_FAILED;
return ret;
}
/* vfs_get_tree() callchains will have grabbed @s_umount */
up_write(&sb->s_umount);
fc->phase = FS_CONTEXT_AWAITING_MOUNT;
return 0;
}
static int vfs_cmd_reconfigure(struct fs_context *fc)
{
struct super_block *sb;
int ret;
if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
return -EBUSY;
fc->phase = FS_CONTEXT_RECONFIGURING;
sb = fc->root->d_sb;
if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
fc->phase = FS_CONTEXT_FAILED;
return -EPERM;
}
down_write(&sb->s_umount);
ret = reconfigure_super(fc);
up_write(&sb->s_umount);
if (ret) {
fc->phase = FS_CONTEXT_FAILED;
return ret;
}
vfs_clean_context(fc);
return 0;
}
/*
* Check the state and apply the configuration. Note that this function is
* allowed to 'steal' the value by setting param->xxx to NULL before returning.
......@@ -216,7 +282,6 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags
static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
struct fs_parameter *param)
{
struct super_block *sb;
int ret;
ret = finish_clean_context(fc);
......@@ -224,39 +289,11 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
return ret;
switch (cmd) {
case FSCONFIG_CMD_CREATE:
if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
return -EBUSY;
if (!mount_capable(fc))
return -EPERM;
fc->phase = FS_CONTEXT_CREATING;
ret = vfs_get_tree(fc);
if (ret)
break;
sb = fc->root->d_sb;
ret = security_sb_kern_mount(sb);
if (unlikely(ret)) {
fc_drop_locked(fc);
break;
}
up_write(&sb->s_umount);
fc->phase = FS_CONTEXT_AWAITING_MOUNT;
return 0;
return vfs_cmd_create(fc, false);
case FSCONFIG_CMD_CREATE_EXCL:
return vfs_cmd_create(fc, true);
case FSCONFIG_CMD_RECONFIGURE:
if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
return -EBUSY;
fc->phase = FS_CONTEXT_RECONFIGURING;
sb = fc->root->d_sb;
if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
ret = -EPERM;
break;
}
down_write(&sb->s_umount);
ret = reconfigure_super(fc);
up_write(&sb->s_umount);
if (ret)
break;
vfs_clean_context(fc);
return 0;
return vfs_cmd_reconfigure(fc);
default:
if (fc->phase != FS_CONTEXT_CREATE_PARAMS &&
fc->phase != FS_CONTEXT_RECONF_PARAMS)
......@@ -264,8 +301,6 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
return vfs_parse_fs_param(fc, param);
}
fc->phase = FS_CONTEXT_FAILED;
return ret;
}
/**
......@@ -353,6 +388,7 @@ SYSCALL_DEFINE5(fsconfig,
return -EINVAL;
break;
case FSCONFIG_CMD_CREATE:
case FSCONFIG_CMD_CREATE_EXCL:
case FSCONFIG_CMD_RECONFIGURE:
if (_key || _value || aux)
return -EINVAL;
......
......@@ -546,17 +546,31 @@ bool mount_capable(struct fs_context *fc)
* @test: Comparison callback
* @set: Setup callback
*
* Find or create a superblock using the parameters stored in the filesystem
* context and the two callback functions.
* Create a new superblock or find an existing one.
*
* If an extant superblock is matched, then that will be returned with an
* elevated reference count that the caller must transfer or discard.
* The @test callback is used to find a matching existing superblock.
* Whether or not the requested parameters in @fc are taken into account
* is specific to the @test callback that is used. They may even be
* completely ignored.
*
* If an extant superblock is matched, it will be returned unless:
*
* (1) the namespace the filesystem context @fc and the extant
* superblock's namespace differ
*
* (2) the filesystem context @fc has requested that reusing an extant
* superblock is not allowed
*
* In both cases EBUSY will be returned.
*
* If no match is made, a new superblock will be allocated and basic
* initialisation will be performed (s_type, s_fs_info and s_id will be set and
* the set() callback will be invoked), the superblock will be published and it
* will be returned in a partially constructed state with SB_BORN and SB_ACTIVE
* as yet unset.
* initialisation will be performed (s_type, s_fs_info and s_id will be
* set and the @set callback will be invoked), the superblock will be
* published and it will be returned in a partially constructed state
* with SB_BORN and SB_ACTIVE as yet unset.
*
* Return: On success, an extant or newly created superblock is
* returned. On failure an error pointer is returned.
*/
struct super_block *sget_fc(struct fs_context *fc,
int (*test)(struct super_block *, struct fs_context *),
......@@ -603,9 +617,13 @@ struct super_block *sget_fc(struct fs_context *fc,
return s;
share_extant_sb:
if (user_ns != old->s_user_ns) {
if (user_ns != old->s_user_ns || fc->exclusive) {
spin_unlock(&sb_lock);
destroy_unused_super(s);
if (fc->exclusive)
warnfc(fc, "reusing existing filesystem not allowed");
else
warnfc(fc, "reusing existing filesystem in another namespace not allowed");
return ERR_PTR(-EBUSY);
}
if (!grab_super(old))
......@@ -1136,7 +1154,7 @@ static int test_single_super(struct super_block *s, struct fs_context *fc)
return 1;
}
static int vfs_get_super(struct fs_context *fc, bool reconf,
static int vfs_get_super(struct fs_context *fc,
int (*test)(struct super_block *, struct fs_context *),
int (*fill_super)(struct super_block *sb,
struct fs_context *fc))
......@@ -1154,19 +1172,9 @@ static int vfs_get_super(struct fs_context *fc, bool reconf,
goto error;
sb->s_flags |= SB_ACTIVE;
fc->root = dget(sb->s_root);
} else {
fc->root = dget(sb->s_root);
if (reconf) {
err = reconfigure_super(fc);
if (err < 0) {
dput(fc->root);
fc->root = NULL;
goto error;
}
}
}
fc->root = dget(sb->s_root);
return 0;
error:
......@@ -1178,7 +1186,7 @@ int get_tree_nodev(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc))
{
return vfs_get_super(fc, false, NULL, fill_super);
return vfs_get_super(fc, NULL, fill_super);
}
EXPORT_SYMBOL(get_tree_nodev);
......@@ -1186,25 +1194,17 @@ int get_tree_single(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc))
{
return vfs_get_super(fc, false, test_single_super, fill_super);
return vfs_get_super(fc, test_single_super, fill_super);
}
EXPORT_SYMBOL(get_tree_single);
int get_tree_single_reconf(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc))
{
return vfs_get_super(fc, true, test_single_super, fill_super);
}
EXPORT_SYMBOL(get_tree_single_reconf);
int get_tree_keyed(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc),
void *key)
{
fc->s_fs_info = key;
return vfs_get_super(fc, false, test_keyed_super, fill_super);
return vfs_get_super(fc, test_keyed_super, fill_super);
}
EXPORT_SYMBOL(get_tree_keyed);
......
......@@ -109,6 +109,7 @@ struct fs_context {
bool need_free:1; /* Need to call ops->free() */
bool global:1; /* Goes into &init_user_ns */
bool oldapi:1; /* Coming from mount(2) */
bool exclusive:1; /* create new superblock, reject existing one */
};
struct fs_context_operations {
......@@ -150,9 +151,6 @@ extern int get_tree_nodev(struct fs_context *fc,
extern int get_tree_single(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));
extern int get_tree_single_reconf(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));
extern int get_tree_keyed(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc),
......
......@@ -100,8 +100,9 @@ enum fsconfig_command {
FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */
FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */
FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */
FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */
FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */
FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */
};
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment