Commit f608caba authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfs-6.11.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs mount query updates from Christian Brauner:
 "This contains work to extend the abilities of listmount() and
  statmount() and various fixes and cleanups.

  Features:

   - Allow iterating through mounts via listmount() from newest to
     oldest. This makes it possible for mount(8) to keep iterating the
     mount table in reverse order so it gets newest mounts first.

   - Relax permissions on listmount() and statmount().

     It's not necessary to have capabilities in the initial namespace:
     it is sufficient to have capabilities in the owning namespace of
     the mount namespace we're located in to list unreachable mounts in
     that namespace.

   - Extend both listmount() and statmount() to list and stat mounts in
     foreign mount namespaces.

     Currently the only way to iterate over mount entries in mount
     namespaces that aren't in the caller's mount namespace is by
     crawling through /proc in order to find /proc/<pid>/mountinfo for
     the relevant mount namespace.

     This is both very clumsy and hugely inefficient. So extend struct
     mnt_id_req with a new member that allows to specify the mount
     namespace id of the mount namespace we want to look at.

     Luckily internally we already have most of the infrastructure for
     this so we just need to expose it to userspace. Give userspace a
     way to retrieve the id of a mount namespace via statmount() and
     through a new nsfs ioctl() on mount namespace file descriptor.

     This comes with appropriate selftests.

   - Expose mount options through statmount().

     Currently if userspace wants to get mount options for a mount and
     with statmount(), they still have to open /proc/<pid>/mountinfo to
     parse mount options. Simply the information through statmount()
     directly.

     Afterwards it's possible to only rely on statmount() and
     listmount() to retrieve all and more information than
     /proc/<pid>/mountinfo provides.

     This comes with appropriate selftests.

  Fixes:

   - Avoid copying to userspace under the namespace semaphore in
     listmount.

  Cleanups:

   - Simplify the error handling in listmount by relying on our newly
     added cleanup infrastructure.

   - Refuse invalid mount ids early for both listmount and statmount"

* tag 'vfs-6.11.mount' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  fs: reject invalid last mount id early
  fs: refuse mnt id requests with invalid ids early
  fs: find rootfs mount of the mount namespace
  fs: only copy to userspace on success in listmount()
  sefltests: extend the statmount test for mount options
  fs: use guard for namespace_sem in statmount()
  fs: export mount options via statmount()
  fs: rename show_mnt_opts -> show_vfsmnt_opts
  selftests: add a test for the foreign mnt ns extensions
  fs: add an ioctl to get the mnt ns id from nsfs
  fs: Allow statmount() in foreign mount namespace
  fs: Allow listmount() in foreign mount namespace
  fs: export the mount ns id via statmount
  fs: keep an index of current mount namespaces
  fs: relax permissions for statmount()
  listmount: allow listing in reverse order
  fs: relax permissions for listmount()
  fs: simplify error handling
  fs: don't copy to userspace under namespace semaphore
  path: add cleanup helper
parents 2aae1d67 4bed843b
...@@ -16,6 +16,8 @@ struct mnt_namespace { ...@@ -16,6 +16,8 @@ struct mnt_namespace {
u64 event; u64 event;
unsigned int nr_mounts; /* # of mounts in the namespace */ unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts; unsigned int pending_mounts;
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
refcount_t passive; /* number references not pinning @mounts */
} __randomize_layout; } __randomize_layout;
struct mnt_pcp { struct mnt_pcp {
......
...@@ -70,7 +70,8 @@ static DEFINE_IDA(mnt_id_ida); ...@@ -70,7 +70,8 @@ static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida); static DEFINE_IDA(mnt_group_ida);
/* Don't allow confusion with old 32bit mount ID */ /* Don't allow confusion with old 32bit mount ID */
static atomic64_t mnt_id_ctr = ATOMIC64_INIT(1ULL << 32); #define MNT_UNIQUE_ID_OFFSET (1ULL << 32)
static atomic64_t mnt_id_ctr = ATOMIC64_INIT(MNT_UNIQUE_ID_OFFSET);
static struct hlist_head *mount_hashtable __ro_after_init; static struct hlist_head *mount_hashtable __ro_after_init;
static struct hlist_head *mountpoint_hashtable __ro_after_init; static struct hlist_head *mountpoint_hashtable __ro_after_init;
...@@ -78,6 +79,8 @@ static struct kmem_cache *mnt_cache __ro_after_init; ...@@ -78,6 +79,8 @@ static struct kmem_cache *mnt_cache __ro_after_init;
static DECLARE_RWSEM(namespace_sem); static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
static DEFINE_RWLOCK(mnt_ns_tree_lock);
static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
struct mount_kattr { struct mount_kattr {
unsigned int attr_set; unsigned int attr_set;
...@@ -103,6 +106,109 @@ EXPORT_SYMBOL_GPL(fs_kobj); ...@@ -103,6 +106,109 @@ EXPORT_SYMBOL_GPL(fs_kobj);
*/ */
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
static int mnt_ns_cmp(u64 seq, const struct mnt_namespace *ns)
{
u64 seq_b = ns->seq;
if (seq < seq_b)
return -1;
if (seq > seq_b)
return 1;
return 0;
}
static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
{
if (!node)
return NULL;
return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
}
static bool mnt_ns_less(struct rb_node *a, const struct rb_node *b)
{
struct mnt_namespace *ns_a = node_to_mnt_ns(a);
struct mnt_namespace *ns_b = node_to_mnt_ns(b);
u64 seq_a = ns_a->seq;
return mnt_ns_cmp(seq_a, ns_b) < 0;
}
static void mnt_ns_tree_add(struct mnt_namespace *ns)
{
guard(write_lock)(&mnt_ns_tree_lock);
rb_add(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_less);
}
static void mnt_ns_release(struct mnt_namespace *ns)
{
lockdep_assert_not_held(&mnt_ns_tree_lock);
/* keep alive for {list,stat}mount() */
if (refcount_dec_and_test(&ns->passive)) {
put_user_ns(ns->user_ns);
kfree(ns);
}
}
DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
static void mnt_ns_tree_remove(struct mnt_namespace *ns)
{
/* remove from global mount namespace list */
if (!is_anon_ns(ns)) {
guard(write_lock)(&mnt_ns_tree_lock);
rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
}
mnt_ns_release(ns);
}
/*
* Returns the mount namespace which either has the specified id, or has the
* next smallest id afer the specified one.
*/
static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id)
{
struct rb_node *node = mnt_ns_tree.rb_node;
struct mnt_namespace *ret = NULL;
lockdep_assert_held(&mnt_ns_tree_lock);
while (node) {
struct mnt_namespace *n = node_to_mnt_ns(node);
if (mnt_ns_id <= n->seq) {
ret = node_to_mnt_ns(node);
if (mnt_ns_id == n->seq)
break;
node = node->rb_left;
} else {
node = node->rb_right;
}
}
return ret;
}
/*
* Lookup a mount namespace by id and take a passive reference count. Taking a
* passive reference means the mount namespace can be emptied if e.g., the last
* task holding an active reference exits. To access the mounts of the
* namespace the @namespace_sem must first be acquired. If the namespace has
* already shut down before acquiring @namespace_sem, {list,stat}mount() will
* see that the mount rbtree of the namespace is empty.
*/
static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
{
struct mnt_namespace *ns;
guard(read_lock)(&mnt_ns_tree_lock);
ns = mnt_ns_find_id_at(mnt_ns_id);
if (!ns || ns->seq != mnt_ns_id)
return NULL;
refcount_inc(&ns->passive);
return ns;
}
static inline void lock_mount_hash(void) static inline void lock_mount_hash(void)
{ {
write_seqlock(&mount_lock); write_seqlock(&mount_lock);
...@@ -1448,6 +1554,30 @@ static struct mount *mnt_find_id_at(struct mnt_namespace *ns, u64 mnt_id) ...@@ -1448,6 +1554,30 @@ static struct mount *mnt_find_id_at(struct mnt_namespace *ns, u64 mnt_id)
return ret; return ret;
} }
/*
* Returns the mount which either has the specified mnt_id, or has the next
* greater id before the specified one.
*/
static struct mount *mnt_find_id_at_reverse(struct mnt_namespace *ns, u64 mnt_id)
{
struct rb_node *node = ns->mounts.rb_node;
struct mount *ret = NULL;
while (node) {
struct mount *m = node_to_mount(node);
if (mnt_id >= m->mnt_id_unique) {
ret = node_to_mount(node);
if (mnt_id == m->mnt_id_unique)
break;
node = node->rb_right;
} else {
node = node->rb_left;
}
}
return ret;
}
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
/* iterator; we want it to have access to namespace_sem, thus here... */ /* iterator; we want it to have access to namespace_sem, thus here... */
...@@ -3699,8 +3829,7 @@ static void free_mnt_ns(struct mnt_namespace *ns) ...@@ -3699,8 +3829,7 @@ static void free_mnt_ns(struct mnt_namespace *ns)
if (!is_anon_ns(ns)) if (!is_anon_ns(ns))
ns_free_inum(&ns->ns); ns_free_inum(&ns->ns);
dec_mnt_namespaces(ns->ucounts); dec_mnt_namespaces(ns->ucounts);
put_user_ns(ns->user_ns); mnt_ns_tree_remove(ns);
kfree(ns);
} }
/* /*
...@@ -3739,7 +3868,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a ...@@ -3739,7 +3868,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
if (!anon) if (!anon)
new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
refcount_set(&new_ns->ns.count, 1); refcount_set(&new_ns->ns.count, 1);
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT; new_ns->mounts = RB_ROOT;
RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
init_waitqueue_head(&new_ns->poll); init_waitqueue_head(&new_ns->poll);
new_ns->user_ns = get_user_ns(user_ns); new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts; new_ns->ucounts = ucounts;
...@@ -3816,6 +3947,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, ...@@ -3816,6 +3947,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
while (p->mnt.mnt_root != q->mnt.mnt_root) while (p->mnt.mnt_root != q->mnt.mnt_root)
p = next_mnt(skip_mnt_tree(p), old); p = next_mnt(skip_mnt_tree(p), old);
} }
mnt_ns_tree_add(new_ns);
namespace_unlock(); namespace_unlock();
if (rootmnt) if (rootmnt)
...@@ -4833,6 +4965,40 @@ static int statmount_fs_type(struct kstatmount *s, struct seq_file *seq) ...@@ -4833,6 +4965,40 @@ static int statmount_fs_type(struct kstatmount *s, struct seq_file *seq)
return 0; return 0;
} }
static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns)
{
s->sm.mask |= STATMOUNT_MNT_NS_ID;
s->sm.mnt_ns_id = ns->seq;
}
static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
{
struct vfsmount *mnt = s->mnt;
struct super_block *sb = mnt->mnt_sb;
int err;
if (sb->s_op->show_options) {
size_t start = seq->count;
err = sb->s_op->show_options(seq, mnt->mnt_root);
if (err)
return err;
if (unlikely(seq_has_overflowed(seq)))
return -EAGAIN;
if (seq->count == start)
return 0;
/* skip leading comma */
memmove(seq->buf + start, seq->buf + start + 1,
seq->count - start - 1);
seq->count--;
}
return 0;
}
static int statmount_string(struct kstatmount *s, u64 flag) static int statmount_string(struct kstatmount *s, u64 flag)
{ {
int ret; int ret;
...@@ -4853,6 +5019,10 @@ static int statmount_string(struct kstatmount *s, u64 flag) ...@@ -4853,6 +5019,10 @@ static int statmount_string(struct kstatmount *s, u64 flag)
sm->mnt_point = seq->count; sm->mnt_point = seq->count;
ret = statmount_mnt_point(s, seq); ret = statmount_mnt_point(s, seq);
break; break;
case STATMOUNT_MNT_OPTS:
sm->mnt_opts = seq->count;
ret = statmount_mnt_opts(s, seq);
break;
default: default:
WARN_ON_ONCE(true); WARN_ON_ONCE(true);
return -EINVAL; return -EINVAL;
...@@ -4893,23 +5063,84 @@ static int copy_statmount_to_user(struct kstatmount *s) ...@@ -4893,23 +5063,84 @@ static int copy_statmount_to_user(struct kstatmount *s)
return 0; return 0;
} }
static int do_statmount(struct kstatmount *s) static struct mount *listmnt_next(struct mount *curr, bool reverse)
{ {
struct mount *m = real_mount(s->mnt); struct rb_node *node;
if (reverse)
node = rb_prev(&curr->mnt_node);
else
node = rb_next(&curr->mnt_node);
return node_to_mount(node);
}
static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
{
struct mount *first, *child;
rwsem_assert_held(&namespace_sem);
/* We're looking at our own ns, just use get_fs_root. */
if (ns == current->nsproxy->mnt_ns) {
get_fs_root(current->fs, root);
return 0;
}
/*
* We have to find the first mount in our ns and use that, however it
* may not exist, so handle that properly.
*/
if (RB_EMPTY_ROOT(&ns->mounts))
return -ENOENT;
first = child = ns->root;
for (;;) {
child = listmnt_next(child, false);
if (!child)
return -ENOENT;
if (child->mnt_parent == first)
break;
}
root->mnt = mntget(&child->mnt);
root->dentry = dget(root->mnt->mnt_root);
return 0;
}
static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
struct mnt_namespace *ns)
{
struct path root __free(path_put) = {};
struct mount *m;
int err; int err;
/* Has the namespace already been emptied? */
if (mnt_ns_id && RB_EMPTY_ROOT(&ns->mounts))
return -ENOENT;
s->mnt = lookup_mnt_in_ns(mnt_id, ns);
if (!s->mnt)
return -ENOENT;
err = grab_requested_root(ns, &root);
if (err)
return err;
/* /*
* Don't trigger audit denials. We just want to determine what * Don't trigger audit denials. We just want to determine what
* mounts to show users. * mounts to show users.
*/ */
if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) && m = real_mount(s->mnt);
!ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) if (!is_path_reachable(m, m->mnt.mnt_root, &root) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
err = security_sb_statfs(s->mnt->mnt_root); err = security_sb_statfs(s->mnt->mnt_root);
if (err) if (err)
return err; return err;
s->root = root;
if (s->mask & STATMOUNT_SB_BASIC) if (s->mask & STATMOUNT_SB_BASIC)
statmount_sb_basic(s); statmount_sb_basic(s);
...@@ -4928,6 +5159,12 @@ static int do_statmount(struct kstatmount *s) ...@@ -4928,6 +5159,12 @@ static int do_statmount(struct kstatmount *s)
if (!err && s->mask & STATMOUNT_MNT_POINT) if (!err && s->mask & STATMOUNT_MNT_POINT)
err = statmount_string(s, STATMOUNT_MNT_POINT); err = statmount_string(s, STATMOUNT_MNT_POINT);
if (!err && s->mask & STATMOUNT_MNT_OPTS)
err = statmount_string(s, STATMOUNT_MNT_OPTS);
if (!err && s->mask & STATMOUNT_MNT_NS_ID)
statmount_mnt_ns_id(s, ns);
if (err) if (err)
return err; return err;
...@@ -4945,6 +5182,9 @@ static inline bool retry_statmount(const long ret, size_t *seq_size) ...@@ -4945,6 +5182,9 @@ static inline bool retry_statmount(const long ret, size_t *seq_size)
return true; return true;
} }
#define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \
STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS)
static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
struct statmount __user *buf, size_t bufsize, struct statmount __user *buf, size_t bufsize,
size_t seq_size) size_t seq_size)
...@@ -4956,10 +5196,18 @@ static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, ...@@ -4956,10 +5196,18 @@ static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
ks->mask = kreq->param; ks->mask = kreq->param;
ks->buf = buf; ks->buf = buf;
ks->bufsize = bufsize; ks->bufsize = bufsize;
ks->seq.size = seq_size;
ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT); if (ks->mask & STATMOUNT_STRING_REQ) {
if (!ks->seq.buf) if (bufsize == sizeof(ks->sm))
return -ENOMEM; return -EOVERFLOW;
ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT);
if (!ks->seq.buf)
return -ENOMEM;
ks->seq.size = seq_size;
}
return 0; return 0;
} }
...@@ -4969,7 +5217,7 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req, ...@@ -4969,7 +5217,7 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
int ret; int ret;
size_t usize; size_t usize;
BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER0); BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER1);
ret = get_user(usize, &req->size); ret = get_user(usize, &req->size);
if (ret) if (ret)
...@@ -4984,16 +5232,32 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req, ...@@ -4984,16 +5232,32 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
return ret; return ret;
if (kreq->spare != 0) if (kreq->spare != 0)
return -EINVAL; return -EINVAL;
/* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET)
return -EINVAL;
return 0; return 0;
} }
/*
* If the user requested a specific mount namespace id, look that up and return
* that, or if not simply grab a passive reference on our mount namespace and
* return that.
*/
static struct mnt_namespace *grab_requested_mnt_ns(u64 mnt_ns_id)
{
if (mnt_ns_id)
return lookup_mnt_ns(mnt_ns_id);
refcount_inc(&current->nsproxy->mnt_ns->passive);
return current->nsproxy->mnt_ns;
}
SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req, SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
struct statmount __user *, buf, size_t, bufsize, struct statmount __user *, buf, size_t, bufsize,
unsigned int, flags) unsigned int, flags)
{ {
struct vfsmount *mnt; struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
struct kstatmount *ks __free(kfree) = NULL;
struct mnt_id_req kreq; struct mnt_id_req kreq;
struct kstatmount ks;
/* We currently support retrieval of 3 strings. */ /* We currently support retrieval of 3 strings. */
size_t seq_size = 3 * PATH_MAX; size_t seq_size = 3 * PATH_MAX;
int ret; int ret;
...@@ -5005,64 +5269,88 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req, ...@@ -5005,64 +5269,88 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
if (ret) if (ret)
return ret; return ret;
ns = grab_requested_mnt_ns(kreq.mnt_ns_id);
if (!ns)
return -ENOENT;
if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -ENOENT;
ks = kmalloc(sizeof(*ks), GFP_KERNEL_ACCOUNT);
if (!ks)
return -ENOMEM;
retry: retry:
ret = prepare_kstatmount(&ks, &kreq, buf, bufsize, seq_size); ret = prepare_kstatmount(ks, &kreq, buf, bufsize, seq_size);
if (ret) if (ret)
return ret; return ret;
down_read(&namespace_sem); scoped_guard(rwsem_read, &namespace_sem)
mnt = lookup_mnt_in_ns(kreq.mnt_id, current->nsproxy->mnt_ns); ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns);
if (!mnt) {
up_read(&namespace_sem);
kvfree(ks.seq.buf);
return -ENOENT;
}
ks.mnt = mnt;
get_fs_root(current->fs, &ks.root);
ret = do_statmount(&ks);
path_put(&ks.root);
up_read(&namespace_sem);
if (!ret) if (!ret)
ret = copy_statmount_to_user(&ks); ret = copy_statmount_to_user(ks);
kvfree(ks.seq.buf); kvfree(ks->seq.buf);
if (retry_statmount(ret, &seq_size)) if (retry_statmount(ret, &seq_size))
goto retry; goto retry;
return ret; return ret;
} }
static struct mount *listmnt_next(struct mount *curr) static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
u64 last_mnt_id, u64 *mnt_ids, size_t nr_mnt_ids,
bool reverse)
{ {
return node_to_mount(rb_next(&curr->mnt_node)); struct path root __free(path_put) = {};
} struct path orig;
struct mount *r, *first;
static ssize_t do_listmount(struct mount *first, struct path *orig,
u64 mnt_parent_id, u64 __user *mnt_ids,
size_t nr_mnt_ids, const struct path *root)
{
struct mount *r;
ssize_t ret; ssize_t ret;
rwsem_assert_held(&namespace_sem);
ret = grab_requested_root(ns, &root);
if (ret)
return ret;
if (mnt_parent_id == LSMT_ROOT) {
orig = root;
} else {
orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns);
if (!orig.mnt)
return -ENOENT;
orig.dentry = orig.mnt->mnt_root;
}
/* /*
* Don't trigger audit denials. We just want to determine what * Don't trigger audit denials. We just want to determine what
* mounts to show users. * mounts to show users.
*/ */
if (!is_path_reachable(real_mount(orig->mnt), orig->dentry, root) && if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &root) &&
!ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
ret = security_sb_statfs(orig->dentry); ret = security_sb_statfs(orig.dentry);
if (ret) if (ret)
return ret; return ret;
for (ret = 0, r = first; r && nr_mnt_ids; r = listmnt_next(r)) { if (!last_mnt_id) {
if (reverse)
first = node_to_mount(rb_last(&ns->mounts));
else
first = node_to_mount(rb_first(&ns->mounts));
} else {
if (reverse)
first = mnt_find_id_at_reverse(ns, last_mnt_id - 1);
else
first = mnt_find_id_at(ns, last_mnt_id + 1);
}
for (ret = 0, r = first; r && nr_mnt_ids; r = listmnt_next(r, reverse)) {
if (r->mnt_id_unique == mnt_parent_id) if (r->mnt_id_unique == mnt_parent_id)
continue; continue;
if (!is_path_reachable(r, r->mnt.mnt_root, orig)) if (!is_path_reachable(r, r->mnt.mnt_root, &orig))
continue; continue;
if (put_user(r->mnt_id_unique, mnt_ids)) *mnt_ids = r->mnt_id_unique;
return -EFAULT;
mnt_ids++; mnt_ids++;
nr_mnt_ids--; nr_mnt_ids--;
ret++; ret++;
...@@ -5070,22 +5358,26 @@ static ssize_t do_listmount(struct mount *first, struct path *orig, ...@@ -5070,22 +5358,26 @@ static ssize_t do_listmount(struct mount *first, struct path *orig,
return ret; return ret;
} }
SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *, SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
mnt_ids, size_t, nr_mnt_ids, unsigned int, flags) u64 __user *, mnt_ids, size_t, nr_mnt_ids, unsigned int, flags)
{ {
struct mnt_namespace *ns = current->nsproxy->mnt_ns; u64 *kmnt_ids __free(kvfree) = NULL;
const size_t maxcount = 1000000;
struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
struct mnt_id_req kreq; struct mnt_id_req kreq;
struct mount *first; u64 last_mnt_id;
struct path root, orig;
u64 mnt_parent_id, last_mnt_id;
const size_t maxcount = (size_t)-1 >> 3;
ssize_t ret; ssize_t ret;
if (flags) if (flags & ~LISTMOUNT_REVERSE)
return -EINVAL; return -EINVAL;
/*
* If the mount namespace really has more than 1 million mounts the
* caller must iterate over the mount namespace (and reconsider their
* system design...).
*/
if (unlikely(nr_mnt_ids > maxcount)) if (unlikely(nr_mnt_ids > maxcount))
return -EFAULT; return -EOVERFLOW;
if (!access_ok(mnt_ids, nr_mnt_ids * sizeof(*mnt_ids))) if (!access_ok(mnt_ids, nr_mnt_ids * sizeof(*mnt_ids)))
return -EFAULT; return -EFAULT;
...@@ -5093,33 +5385,37 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *, ...@@ -5093,33 +5385,37 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *,
ret = copy_mnt_id_req(req, &kreq); ret = copy_mnt_id_req(req, &kreq);
if (ret) if (ret)
return ret; return ret;
mnt_parent_id = kreq.mnt_id;
last_mnt_id = kreq.param; last_mnt_id = kreq.param;
/* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
if (last_mnt_id != 0 && last_mnt_id <= MNT_UNIQUE_ID_OFFSET)
return -EINVAL;
down_read(&namespace_sem); kmnt_ids = kvmalloc_array(nr_mnt_ids, sizeof(*kmnt_ids),
get_fs_root(current->fs, &root); GFP_KERNEL_ACCOUNT);
if (mnt_parent_id == LSMT_ROOT) { if (!kmnt_ids)
orig = root; return -ENOMEM;
} else {
ret = -ENOENT; ns = grab_requested_mnt_ns(kreq.mnt_ns_id);
orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns); if (!ns)
if (!orig.mnt) return -ENOENT;
goto err;
orig.dentry = orig.mnt->mnt_root; if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
} !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
if (!last_mnt_id) return -ENOENT;
first = node_to_mount(rb_first(&ns->mounts));
else scoped_guard(rwsem_read, &namespace_sem)
first = mnt_find_id_at(ns, last_mnt_id + 1); ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids,
nr_mnt_ids, (flags & LISTMOUNT_REVERSE));
if (ret <= 0)
return ret;
if (copy_to_user(mnt_ids, kmnt_ids, ret * sizeof(*mnt_ids)))
return -EFAULT;
ret = do_listmount(first, &orig, mnt_parent_id, mnt_ids, nr_mnt_ids, &root);
err:
path_put(&root);
up_read(&namespace_sem);
return ret; return ret;
} }
static void __init init_mount_tree(void) static void __init init_mount_tree(void)
{ {
struct vfsmount *mnt; struct vfsmount *mnt;
...@@ -5147,6 +5443,8 @@ static void __init init_mount_tree(void) ...@@ -5147,6 +5443,8 @@ static void __init init_mount_tree(void)
set_fs_pwd(current->fs, &root); set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root); set_fs_root(current->fs, &root);
mnt_ns_tree_add(ns);
} }
void __init mnt_init(void) void __init mnt_init(void)
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/nsfs.h> #include <linux/nsfs.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include "mount.h"
#include "internal.h" #include "internal.h"
static struct vfsmount *nsfs_mnt; static struct vfsmount *nsfs_mnt;
...@@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, ...@@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
argp = (uid_t __user *) arg; argp = (uid_t __user *) arg;
uid = from_kuid_munged(current_user_ns(), user_ns->owner); uid = from_kuid_munged(current_user_ns(), user_ns->owner);
return put_user(uid, argp); return put_user(uid, argp);
case NS_GET_MNTNS_ID: {
struct mnt_namespace *mnt_ns;
__u64 __user *idp;
__u64 id;
if (ns->ops->type != CLONE_NEWNS)
return -EINVAL;
mnt_ns = container_of(ns, struct mnt_namespace, ns);
idp = (__u64 __user *)arg;
id = mnt_ns->seq;
return put_user(id, idp);
}
default: default:
return -ENOTTY; return -ENOTTY;
} }
......
...@@ -61,7 +61,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb) ...@@ -61,7 +61,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb)
return security_sb_show_options(m, sb); return security_sb_show_options(m, sb);
} }
static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) static void show_vfsmnt_opts(struct seq_file *m, struct vfsmount *mnt)
{ {
static const struct proc_fs_opts mnt_opts[] = { static const struct proc_fs_opts mnt_opts[] = {
{ MNT_NOSUID, ",nosuid" }, { MNT_NOSUID, ",nosuid" },
...@@ -124,7 +124,7 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt) ...@@ -124,7 +124,7 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
err = show_sb_opts(m, sb); err = show_sb_opts(m, sb);
if (err) if (err)
goto out; goto out;
show_mnt_opts(m, mnt); show_vfsmnt_opts(m, mnt);
if (sb->s_op->show_options) if (sb->s_op->show_options)
err = sb->s_op->show_options(m, mnt_path.dentry); err = sb->s_op->show_options(m, mnt_path.dentry);
seq_puts(m, " 0 0\n"); seq_puts(m, " 0 0\n");
...@@ -153,7 +153,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) ...@@ -153,7 +153,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
goto out; goto out;
seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw"); seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
show_mnt_opts(m, mnt); show_vfsmnt_opts(m, mnt);
/* Tagged fields ("foo:X" or "bar") */ /* Tagged fields ("foo:X" or "bar") */
if (IS_MNT_SHARED(r)) if (IS_MNT_SHARED(r))
......
...@@ -24,4 +24,13 @@ static inline void path_put_init(struct path *path) ...@@ -24,4 +24,13 @@ static inline void path_put_init(struct path *path)
*path = (struct path) { }; *path = (struct path) { };
} }
/*
* Cleanup macro for use with __free(path_put). Avoids dereference and
* copying @path unlike DEFINE_FREE(). path_put() will handle the empty
* path correctly just ensure @path is initialized:
*
* struct path path __free(path_put) = {};
*/
#define __free_path_put path_put
#endif /* _LINUX_PATH_H */ #endif /* _LINUX_PATH_H */
...@@ -154,7 +154,7 @@ struct mount_attr { ...@@ -154,7 +154,7 @@ struct mount_attr {
*/ */
struct statmount { struct statmount {
__u32 size; /* Total size, including strings */ __u32 size; /* Total size, including strings */
__u32 __spare1; __u32 mnt_opts; /* [str] Mount options of the mount */
__u64 mask; /* What results were written */ __u64 mask; /* What results were written */
__u32 sb_dev_major; /* Device ID */ __u32 sb_dev_major; /* Device ID */
__u32 sb_dev_minor; __u32 sb_dev_minor;
...@@ -172,7 +172,8 @@ struct statmount { ...@@ -172,7 +172,8 @@ struct statmount {
__u64 propagate_from; /* Propagation from in current namespace */ __u64 propagate_from; /* Propagation from in current namespace */
__u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_root; /* [str] Root of mount relative to root of fs */
__u32 mnt_point; /* [str] Mountpoint relative to current root */ __u32 mnt_point; /* [str] Mountpoint relative to current root */
__u64 __spare2[50]; __u64 mnt_ns_id; /* ID of the mount namespace */
__u64 __spare2[49];
char str[]; /* Variable size part containing strings */ char str[]; /* Variable size part containing strings */
}; };
...@@ -188,10 +189,12 @@ struct mnt_id_req { ...@@ -188,10 +189,12 @@ struct mnt_id_req {
__u32 spare; __u32 spare;
__u64 mnt_id; __u64 mnt_id;
__u64 param; __u64 param;
__u64 mnt_ns_id;
}; };
/* List of all mnt_id_req versions. */ /* List of all mnt_id_req versions. */
#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
/* /*
* @mask bits for statmount(2) * @mask bits for statmount(2)
...@@ -202,10 +205,13 @@ struct mnt_id_req { ...@@ -202,10 +205,13 @@ struct mnt_id_req {
#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
/* /*
* Special @mnt_id values that can be passed to listmount * Special @mnt_id values that can be passed to listmount
*/ */
#define LSMT_ROOT 0xffffffffffffffff /* root mount */ #define LSMT_ROOT 0xffffffffffffffff /* root mount */
#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
#endif /* _UAPI_LINUX_MOUNT_H */ #endif /* _UAPI_LINUX_MOUNT_H */
...@@ -15,5 +15,7 @@ ...@@ -15,5 +15,7 @@
#define NS_GET_NSTYPE _IO(NSIO, 0x3) #define NS_GET_NSTYPE _IO(NSIO, 0x3)
/* Get owner UID (in the caller's user namespace) for a user namespace */ /* Get owner UID (in the caller's user namespace) for a user namespace */
#define NS_GET_OWNER_UID _IO(NSIO, 0x4) #define NS_GET_OWNER_UID _IO(NSIO, 0x4)
/* Get the id for a mount namespace */
#define NS_GET_MNTNS_ID _IO(NSIO, 0x5)
#endif /* __LINUX_NSFS_H */ #endif /* __LINUX_NSFS_H */
# SPDX-License-Identifier: GPL-2.0-or-later # SPDX-License-Identifier: GPL-2.0-or-later
CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
TEST_GEN_PROGS := statmount_test TEST_GEN_PROGS := statmount_test statmount_test_ns
include ../../lib.mk include ../../lib.mk
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __STATMOUNT_H
#define __STATMOUNT_H
#include <stdint.h>
#include <linux/mount.h>
#include <asm/unistd.h>
static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
struct statmount *buf, size_t bufsize,
unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER0,
.mnt_id = mnt_id,
.param = mask,
};
if (mnt_ns_id) {
req.size = MNT_ID_REQ_SIZE_VER1;
req.mnt_ns_id = mnt_ns_id;
}
return syscall(__NR_statmount, &req, buf, bufsize, flags);
}
static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
uint64_t last_mnt_id, uint64_t list[], size_t num,
unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER0,
.mnt_id = mnt_id,
.param = last_mnt_id,
};
if (mnt_ns_id) {
req.size = MNT_ID_REQ_SIZE_VER1;
req.mnt_ns_id = mnt_ns_id;
}
return syscall(__NR_listmount, &req, list, num, flags);
}
#endif /* __STATMOUNT_H */
...@@ -4,17 +4,15 @@ ...@@ -4,17 +4,15 @@
#include <assert.h> #include <assert.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h>
#include <sched.h> #include <sched.h>
#include <fcntl.h> #include <fcntl.h>
#include <sys/param.h> #include <sys/param.h>
#include <sys/mount.h> #include <sys/mount.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/statfs.h> #include <sys/statfs.h>
#include <linux/mount.h>
#include <linux/stat.h> #include <linux/stat.h>
#include <asm/unistd.h>
#include "statmount.h"
#include "../../kselftest.h" #include "../../kselftest.h"
static const char *const known_fs[] = { static const char *const known_fs[] = {
...@@ -36,18 +34,6 @@ static const char *const known_fs[] = { ...@@ -36,18 +34,6 @@ static const char *const known_fs[] = {
"ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs", "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs",
"zonefs", NULL }; "zonefs", NULL };
static int statmount(uint64_t mnt_id, uint64_t mask, struct statmount *buf,
size_t bufsize, unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER0,
.mnt_id = mnt_id,
.param = mask,
};
return syscall(__NR_statmount, &req, buf, bufsize, flags);
}
static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags) static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags)
{ {
size_t bufsize = 1 << 15; size_t bufsize = 1 << 15;
...@@ -56,7 +42,7 @@ static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigne ...@@ -56,7 +42,7 @@ static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigne
int ret; int ret;
for (;;) { for (;;) {
ret = statmount(mnt_id, mask, tmp, bufsize, flags); ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags);
if (ret != -1) if (ret != -1)
break; break;
if (tofree) if (tofree)
...@@ -121,7 +107,7 @@ static char root_mntpoint[] = "/tmp/statmount_test_root.XXXXXX"; ...@@ -121,7 +107,7 @@ static char root_mntpoint[] = "/tmp/statmount_test_root.XXXXXX";
static int orig_root; static int orig_root;
static uint64_t root_id, parent_id; static uint64_t root_id, parent_id;
static uint32_t old_root_id, old_parent_id; static uint32_t old_root_id, old_parent_id;
static FILE *f_mountinfo;
static void cleanup_namespace(void) static void cleanup_namespace(void)
{ {
...@@ -146,7 +132,7 @@ static void setup_namespace(void) ...@@ -146,7 +132,7 @@ static void setup_namespace(void)
uid_t uid = getuid(); uid_t uid = getuid();
gid_t gid = getgid(); gid_t gid = getgid();
ret = unshare(CLONE_NEWNS|CLONE_NEWUSER); ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
if (ret == -1) if (ret == -1)
ksft_exit_fail_msg("unsharing mountns and userns: %s\n", ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
strerror(errno)); strerror(errno));
...@@ -157,6 +143,11 @@ static void setup_namespace(void) ...@@ -157,6 +143,11 @@ static void setup_namespace(void)
sprintf(buf, "0 %d 1", gid); sprintf(buf, "0 %d 1", gid);
write_file("/proc/self/gid_map", buf); write_file("/proc/self/gid_map", buf);
f_mountinfo = fopen("/proc/self/mountinfo", "re");
if (!f_mountinfo)
ksft_exit_fail_msg("failed to open mountinfo: %s\n",
strerror(errno));
ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
if (ret == -1) if (ret == -1)
ksft_exit_fail_msg("making mount tree private: %s\n", ksft_exit_fail_msg("making mount tree private: %s\n",
...@@ -216,25 +207,13 @@ static int setup_mount_tree(int log2_num) ...@@ -216,25 +207,13 @@ static int setup_mount_tree(int log2_num)
return 0; return 0;
} }
static ssize_t listmount(uint64_t mnt_id, uint64_t last_mnt_id,
uint64_t list[], size_t num, unsigned int flags)
{
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER0,
.mnt_id = mnt_id,
.param = last_mnt_id,
};
return syscall(__NR_listmount, &req, list, num, flags);
}
static void test_listmount_empty_root(void) static void test_listmount_empty_root(void)
{ {
ssize_t res; ssize_t res;
const unsigned int size = 32; const unsigned int size = 32;
uint64_t list[size]; uint64_t list[size];
res = listmount(LSMT_ROOT, 0, list, size, 0); res = listmount(LSMT_ROOT, 0, 0, list, size, 0);
if (res == -1) { if (res == -1) {
ksft_test_result_fail("listmount: %s\n", strerror(errno)); ksft_test_result_fail("listmount: %s\n", strerror(errno));
return; return;
...@@ -259,7 +238,7 @@ static void test_statmount_zero_mask(void) ...@@ -259,7 +238,7 @@ static void test_statmount_zero_mask(void)
struct statmount sm; struct statmount sm;
int ret; int ret;
ret = statmount(root_id, 0, &sm, sizeof(sm), 0); ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0);
if (ret == -1) { if (ret == -1) {
ksft_test_result_fail("statmount zero mask: %s\n", ksft_test_result_fail("statmount zero mask: %s\n",
strerror(errno)); strerror(errno));
...@@ -285,7 +264,7 @@ static void test_statmount_mnt_basic(void) ...@@ -285,7 +264,7 @@ static void test_statmount_mnt_basic(void)
int ret; int ret;
uint64_t mask = STATMOUNT_MNT_BASIC; uint64_t mask = STATMOUNT_MNT_BASIC;
ret = statmount(root_id, mask, &sm, sizeof(sm), 0); ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) { if (ret == -1) {
ksft_test_result_fail("statmount mnt basic: %s\n", ksft_test_result_fail("statmount mnt basic: %s\n",
strerror(errno)); strerror(errno));
...@@ -345,7 +324,7 @@ static void test_statmount_sb_basic(void) ...@@ -345,7 +324,7 @@ static void test_statmount_sb_basic(void)
struct statx sx; struct statx sx;
struct statfs sf; struct statfs sf;
ret = statmount(root_id, mask, &sm, sizeof(sm), 0); ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) { if (ret == -1) {
ksft_test_result_fail("statmount sb basic: %s\n", ksft_test_result_fail("statmount sb basic: %s\n",
strerror(errno)); strerror(errno));
...@@ -470,6 +449,88 @@ static void test_statmount_fs_type(void) ...@@ -470,6 +449,88 @@ static void test_statmount_fs_type(void)
free(sm); free(sm);
} }
static void test_statmount_mnt_opts(void)
{
struct statmount *sm;
const char *statmount_opts;
char *line = NULL;
size_t len = 0;
sm = statmount_alloc(root_id, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS,
0);
if (!sm) {
ksft_test_result_fail("statmount mnt opts: %s\n",
strerror(errno));
return;
}
while (getline(&line, &len, f_mountinfo) != -1) {
int i;
char *p, *p2;
unsigned int old_mnt_id;
old_mnt_id = atoi(line);
if (old_mnt_id != sm->mnt_id_old)
continue;
for (p = line, i = 0; p && i < 5; i++)
p = strchr(p + 1, ' ');
if (!p)
continue;
p2 = strchr(p + 1, ' ');
if (!p2)
continue;
*p2 = '\0';
p = strchr(p2 + 1, '-');
if (!p)
continue;
for (p++, i = 0; p && i < 2; i++)
p = strchr(p + 1, ' ');
if (!p)
continue;
p++;
/* skip generic superblock options */
if (strncmp(p, "ro", 2) == 0)
p += 2;
else if (strncmp(p, "rw", 2) == 0)
p += 2;
if (*p == ',')
p++;
if (strncmp(p, "sync", 4) == 0)
p += 4;
if (*p == ',')
p++;
if (strncmp(p, "dirsync", 7) == 0)
p += 7;
if (*p == ',')
p++;
if (strncmp(p, "lazytime", 8) == 0)
p += 8;
if (*p == ',')
p++;
p2 = strrchr(p, '\n');
if (p2)
*p2 = '\0';
statmount_opts = sm->str + sm->mnt_opts;
if (strcmp(statmount_opts, p) != 0)
ksft_test_result_fail(
"unexpected mount options: '%s' != '%s'\n",
statmount_opts, p);
else
ksft_test_result_pass("statmount mount options\n");
free(sm);
free(line);
return;
}
ksft_test_result_fail("didnt't find mount entry\n");
free(sm);
free(line);
}
static void test_statmount_string(uint64_t mask, size_t off, const char *name) static void test_statmount_string(uint64_t mask, size_t off, const char *name)
{ {
struct statmount *sm; struct statmount *sm;
...@@ -506,14 +567,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name) ...@@ -506,14 +567,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name)
exactsize = sm->size; exactsize = sm->size;
shortsize = sizeof(*sm) + i; shortsize = sizeof(*sm) + i;
ret = statmount(root_id, mask, sm, exactsize, 0); ret = statmount(root_id, 0, mask, sm, exactsize, 0);
if (ret == -1) { if (ret == -1) {
ksft_test_result_fail("statmount exact size: %s\n", ksft_test_result_fail("statmount exact size: %s\n",
strerror(errno)); strerror(errno));
goto out; goto out;
} }
errno = 0; errno = 0;
ret = statmount(root_id, mask, sm, shortsize, 0); ret = statmount(root_id, 0, mask, sm, shortsize, 0);
if (ret != -1 || errno != EOVERFLOW) { if (ret != -1 || errno != EOVERFLOW) {
ksft_test_result_fail("should have failed with EOVERFLOW: %s\n", ksft_test_result_fail("should have failed with EOVERFLOW: %s\n",
strerror(errno)); strerror(errno));
...@@ -541,7 +602,7 @@ static void test_listmount_tree(void) ...@@ -541,7 +602,7 @@ static void test_listmount_tree(void)
if (res == -1) if (res == -1)
return; return;
num = res = listmount(LSMT_ROOT, 0, list, size, 0); num = res = listmount(LSMT_ROOT, 0, 0, list, size, 0);
if (res == -1) { if (res == -1) {
ksft_test_result_fail("listmount: %s\n", strerror(errno)); ksft_test_result_fail("listmount: %s\n", strerror(errno));
return; return;
...@@ -553,7 +614,7 @@ static void test_listmount_tree(void) ...@@ -553,7 +614,7 @@ static void test_listmount_tree(void)
} }
for (i = 0; i < size - step;) { for (i = 0; i < size - step;) {
res = listmount(LSMT_ROOT, i ? list2[i - 1] : 0, list2 + i, step, 0); res = listmount(LSMT_ROOT, 0, i ? list2[i - 1] : 0, list2 + i, step, 0);
if (res == -1) if (res == -1)
ksft_test_result_fail("short listmount: %s\n", ksft_test_result_fail("short listmount: %s\n",
strerror(errno)); strerror(errno));
...@@ -585,18 +646,18 @@ int main(void) ...@@ -585,18 +646,18 @@ int main(void)
int ret; int ret;
uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC | uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC |
STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT | STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT |
STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE; STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE | STATMOUNT_MNT_NS_ID;
ksft_print_header(); ksft_print_header();
ret = statmount(0, 0, NULL, 0, 0); ret = statmount(0, 0, 0, NULL, 0, 0);
assert(ret == -1); assert(ret == -1);
if (errno == ENOSYS) if (errno == ENOSYS)
ksft_exit_skip("statmount() syscall not supported\n"); ksft_exit_skip("statmount() syscall not supported\n");
setup_namespace(); setup_namespace();
ksft_set_plan(14); ksft_set_plan(15);
test_listmount_empty_root(); test_listmount_empty_root();
test_statmount_zero_mask(); test_statmount_zero_mask();
test_statmount_mnt_basic(); test_statmount_mnt_basic();
...@@ -604,6 +665,7 @@ int main(void) ...@@ -604,6 +665,7 @@ int main(void)
test_statmount_mnt_root(); test_statmount_mnt_root();
test_statmount_mnt_point(); test_statmount_mnt_point();
test_statmount_fs_type(); test_statmount_fs_type();
test_statmount_mnt_opts();
test_statmount_string(STATMOUNT_MNT_ROOT, str_off(mnt_root), "mount root"); test_statmount_string(STATMOUNT_MNT_ROOT, str_off(mnt_root), "mount root");
test_statmount_string(STATMOUNT_MNT_POINT, str_off(mnt_point), "mount point"); test_statmount_string(STATMOUNT_MNT_POINT, str_off(mnt_point), "mount point");
test_statmount_string(STATMOUNT_FS_TYPE, str_off(fs_type), "fs type"); test_statmount_string(STATMOUNT_FS_TYPE, str_off(fs_type), "fs type");
......
// SPDX-License-Identifier: GPL-2.0-or-later
#define _GNU_SOURCE
#include <assert.h>
#include <fcntl.h>
#include <limits.h>
#include <sched.h>
#include <stdlib.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <linux/nsfs.h>
#include <linux/stat.h>
#include "statmount.h"
#include "../../kselftest.h"
#define NSID_PASS 0
#define NSID_FAIL 1
#define NSID_SKIP 2
#define NSID_ERROR 3
static void handle_result(int ret, const char *testname)
{
if (ret == NSID_PASS)
ksft_test_result_pass("%s\n", testname);
else if (ret == NSID_FAIL)
ksft_test_result_fail("%s\n", testname);
else if (ret == NSID_ERROR)
ksft_exit_fail_msg("%s\n", testname);
else
ksft_test_result_skip("%s\n", testname);
}
static inline int wait_for_pid(pid_t pid)
{
int status, ret;
again:
ret = waitpid(pid, &status, 0);
if (ret == -1) {
if (errno == EINTR)
goto again;
ksft_print_msg("waitpid returned -1, errno=%d\n", errno);
return -1;
}
if (!WIFEXITED(status)) {
ksft_print_msg(
"waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n",
WIFSIGNALED(status), WTERMSIG(status));
return -1;
}
ret = WEXITSTATUS(status);
return ret;
}
static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id)
{
int fd = open(mnt_ns, O_RDONLY);
if (fd < 0) {
ksft_print_msg("failed to open for ns %s: %s\n",
mnt_ns, strerror(errno));
sleep(60);
return NSID_ERROR;
}
if (ioctl(fd, NS_GET_MNTNS_ID, mnt_ns_id) < 0) {
ksft_print_msg("failed to get the nsid for ns %s: %s\n",
mnt_ns, strerror(errno));
return NSID_ERROR;
}
close(fd);
return NSID_PASS;
}
static int get_mnt_id(const char *path, uint64_t *mnt_id)
{
struct statx sx;
int ret;
ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx);
if (ret == -1) {
ksft_print_msg("retrieving unique mount ID for %s: %s\n", path,
strerror(errno));
return NSID_ERROR;
}
if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) {
ksft_print_msg("no unique mount ID available for %s\n", path);
return NSID_ERROR;
}
*mnt_id = sx.stx_mnt_id;
return NSID_PASS;
}
static int write_file(const char *path, const char *val)
{
int fd = open(path, O_WRONLY);
size_t len = strlen(val);
int ret;
if (fd == -1) {
ksft_print_msg("opening %s for write: %s\n", path, strerror(errno));
return NSID_ERROR;
}
ret = write(fd, val, len);
if (ret == -1) {
ksft_print_msg("writing to %s: %s\n", path, strerror(errno));
return NSID_ERROR;
}
if (ret != len) {
ksft_print_msg("short write to %s\n", path);
return NSID_ERROR;
}
ret = close(fd);
if (ret == -1) {
ksft_print_msg("closing %s\n", path);
return NSID_ERROR;
}
return NSID_PASS;
}
static int setup_namespace(void)
{
int ret;
char buf[32];
uid_t uid = getuid();
gid_t gid = getgid();
ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
if (ret == -1)
ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
strerror(errno));
sprintf(buf, "0 %d 1", uid);
ret = write_file("/proc/self/uid_map", buf);
if (ret != NSID_PASS)
return ret;
ret = write_file("/proc/self/setgroups", "deny");
if (ret != NSID_PASS)
return ret;
sprintf(buf, "0 %d 1", gid);
ret = write_file("/proc/self/gid_map", buf);
if (ret != NSID_PASS)
return ret;
ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
if (ret == -1) {
ksft_print_msg("making mount tree private: %s\n",
strerror(errno));
return NSID_ERROR;
}
return NSID_PASS;
}
static int _test_statmount_mnt_ns_id(void)
{
struct statmount sm;
uint64_t mnt_ns_id;
uint64_t root_id;
int ret;
ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id);
if (ret != NSID_PASS)
return ret;
ret = get_mnt_id("/", &root_id);
if (ret != NSID_PASS)
return ret;
ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
return NSID_ERROR;
}
if (sm.size != sizeof(sm)) {
ksft_print_msg("unexpected size: %u != %u\n", sm.size,
(uint32_t)sizeof(sm));
return NSID_FAIL;
}
if (sm.mask != STATMOUNT_MNT_NS_ID) {
ksft_print_msg("statmount mnt ns id unavailable\n");
return NSID_SKIP;
}
if (sm.mnt_ns_id != mnt_ns_id) {
ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n",
(unsigned long long)sm.mnt_ns_id,
(unsigned long long)mnt_ns_id);
return NSID_FAIL;
}
return NSID_PASS;
}
static void test_statmount_mnt_ns_id(void)
{
pid_t pid;
int ret;
pid = fork();
if (pid < 0)
ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno));
/* We're the original pid, wait for the result. */
if (pid != 0) {
ret = wait_for_pid(pid);
handle_result(ret, "test statmount ns id");
return;
}
ret = setup_namespace();
if (ret != NSID_PASS)
exit(ret);
ret = _test_statmount_mnt_ns_id();
exit(ret);
}
static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts)
{
uint64_t list[256];
uint64_t mnt_ns_id;
uint64_t nr_mounts;
char buf[256];
int ret;
/* Get the mount ns id for our child. */
snprintf(buf, sizeof(buf), "/proc/%lu/ns/mnt", (unsigned long)pid);
ret = get_mnt_ns_id(buf, &mnt_ns_id);
nr_mounts = listmount(LSMT_ROOT, mnt_ns_id, 0, list, 256, 0);
if (nr_mounts == (uint64_t)-1) {
ksft_print_msg("listmount: %s\n", strerror(errno));
return NSID_ERROR;
}
if (nr_mounts != child_nr_mounts) {
ksft_print_msg("listmount results is %zi != %zi\n", nr_mounts,
child_nr_mounts);
return NSID_FAIL;
}
/* Validate that all of our entries match our mnt_ns_id. */
for (int i = 0; i < nr_mounts; i++) {
struct statmount sm;
ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm,
sizeof(sm), 0);
if (ret < 0) {
ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
return NSID_ERROR;
}
if (sm.mask != STATMOUNT_MNT_NS_ID) {
ksft_print_msg("statmount mnt ns id unavailable\n");
return NSID_SKIP;
}
if (sm.mnt_ns_id != mnt_ns_id) {
ksft_print_msg("listmount gave us the wrong ns id: 0x%llx != 0x%llx\n",
(unsigned long long)sm.mnt_ns_id,
(unsigned long long)mnt_ns_id);
return NSID_FAIL;
}
}
return NSID_PASS;
}
static void test_listmount_ns(void)
{
uint64_t nr_mounts;
char pval;
int child_ready_pipe[2];
int parent_ready_pipe[2];
pid_t pid;
int ret, child_ret;
if (pipe(child_ready_pipe) < 0)
ksft_exit_fail_msg("failed to create the child pipe: %s\n",
strerror(errno));
if (pipe(parent_ready_pipe) < 0)
ksft_exit_fail_msg("failed to create the parent pipe: %s\n",
strerror(errno));
pid = fork();
if (pid < 0)
ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno));
if (pid == 0) {
char cval;
uint64_t list[256];
close(child_ready_pipe[0]);
close(parent_ready_pipe[1]);
ret = setup_namespace();
if (ret != NSID_PASS)
exit(ret);
nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 256, 0);
if (nr_mounts == (uint64_t)-1) {
ksft_print_msg("listmount: %s\n", strerror(errno));
exit(NSID_FAIL);
}
/*
* Tell our parent how many mounts we have, and then wait for it
* to tell us we're done.
*/
write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts));
read(parent_ready_pipe[0], &cval, sizeof(cval));
exit(NSID_PASS);
}
close(child_ready_pipe[1]);
close(parent_ready_pipe[0]);
/* Wait until the child has created everything. */
if (read(child_ready_pipe[0], &nr_mounts, sizeof(nr_mounts)) !=
sizeof(nr_mounts))
ret = NSID_ERROR;
ret = validate_external_listmount(pid, nr_mounts);
if (write(parent_ready_pipe[1], &pval, sizeof(pval)) != sizeof(pval))
ret = NSID_ERROR;
child_ret = wait_for_pid(pid);
if (child_ret != NSID_PASS)
ret = child_ret;
handle_result(ret, "test listmount ns id");
}
int main(void)
{
int ret;
ksft_print_header();
ret = statmount(0, 0, 0, NULL, 0, 0);
assert(ret == -1);
if (errno == ENOSYS)
ksft_exit_skip("statmount() syscall not supported\n");
ksft_set_plan(2);
test_statmount_mnt_ns_id();
test_listmount_ns();
if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
ksft_exit_fail();
else
ksft_exit_pass();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment