Commit 2eea9ce4 authored by Miklos Szeredi's avatar Miklos Szeredi Committed by Christian Brauner

mounts: keep list of mounts in an rbtree

When adding a mount to a namespace insert it into an rbtree rooted in the
mnt_namespace instead of a linear list.

The mnt.mnt_list is still used to set up the mount tree and for
propagation, but not after the mount has been added to a namespace.  Hence
mnt_list can live in union with rb_node.  Use MNT_ONRB mount flag to
validate that the mount is on the correct list.

This allows removing the cursor used for reading /proc/$PID/mountinfo.  The
mnt_id_unique of the next mount can be used as an index into the seq file.

Tested by inserting 100k bind mounts, unsharing the mount namespace, and
unmounting.  No performance regressions have been observed.

For the last mount in the 100k list the statmount() call was more than 100x
faster due to the mount ID lookup not having to do a linear search.  This
patch makes the overhead of mount ID lookup non-observable in this range.
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
Link: https://lore.kernel.org/r/20231025140205.3586473-3-mszeredi@redhat.comReviewed-by: default avatarIan Kent <raven@themaw.net>
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 98d2b430
......@@ -8,19 +8,13 @@
struct mnt_namespace {
struct ns_common ns;
struct mount * root;
/*
* Traversal and modification of .list is protected by either
* - taking namespace_sem for write, OR
* - taking namespace_sem for read AND taking .ns_lock.
*/
struct list_head list;
spinlock_t ns_lock;
struct rb_root mounts; /* Protected by namespace_sem */
struct user_namespace *user_ns;
struct ucounts *ucounts;
u64 seq; /* Sequence number to prevent loops */
wait_queue_head_t poll;
u64 event;
unsigned int mounts; /* # of mounts in the namespace */
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
} __randomize_layout;
......@@ -55,7 +49,10 @@ struct mount {
struct list_head mnt_child; /* and going through their mnt_child */
struct list_head mnt_instance; /* mount instance on sb->s_mounts */
const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
struct list_head mnt_list;
union {
struct rb_node mnt_node; /* Under ns->mounts */
struct list_head mnt_list;
};
struct list_head mnt_expire; /* link in fs-specific expiry list */
struct list_head mnt_share; /* circular list of shared mounts */
struct list_head mnt_slave_list;/* list of slave mounts */
......@@ -128,7 +125,6 @@ struct proc_mounts {
struct mnt_namespace *ns;
struct path root;
int (*show)(struct seq_file *, struct vfsmount *);
struct mount cursor;
};
extern const struct seq_operations mounts_op;
......@@ -147,4 +143,12 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
return ns->seq == 0;
}
static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
{
WARN_ON(!(mnt->mnt.mnt_flags & MNT_ONRB));
mnt->mnt.mnt_flags &= ~MNT_ONRB;
rb_erase(&mnt->mnt_node, &mnt->mnt_ns->mounts);
list_add_tail(&mnt->mnt_list, dt_list);
}
extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
This diff is collapsed.
......@@ -468,7 +468,7 @@ static void umount_one(struct mount *mnt, struct list_head *to_umount)
mnt->mnt.mnt_flags |= MNT_UMOUNT;
list_del_init(&mnt->mnt_child);
list_del_init(&mnt->mnt_umounting);
list_move_tail(&mnt->mnt_list, to_umount);
move_from_ns(mnt, to_umount);
}
/*
......
......@@ -283,8 +283,6 @@ static int mounts_open_common(struct inode *inode, struct file *file,
p->ns = ns;
p->root = root;
p->show = show;
INIT_LIST_HEAD(&p->cursor.mnt_list);
p->cursor.mnt.mnt_flags = MNT_CURSOR;
return 0;
......@@ -301,7 +299,6 @@ static int mounts_release(struct inode *inode, struct file *file)
struct seq_file *m = file->private_data;
struct proc_mounts *p = m->private;
path_put(&p->root);
mnt_cursor_del(p->ns, &p->cursor);
put_mnt_ns(p->ns);
return seq_release_private(inode, file);
}
......
......@@ -50,8 +50,7 @@ struct path;
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \
MNT_CURSOR)
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | MNT_ONRB)
#define MNT_INTERNAL 0x4000
......@@ -65,7 +64,7 @@ struct path;
#define MNT_SYNC_UMOUNT 0x2000000
#define MNT_MARKED 0x4000000
#define MNT_UMOUNT 0x8000000
#define MNT_CURSOR 0x10000000
#define MNT_ONRB 0x10000000
struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment