Commit 2c18a63b authored by Christian Brauner's avatar Christian Brauner

super: wait until we passed kill super

Recent rework moved block device closing out of sb->put_super() and into
sb->kill_sb() to avoid deadlocks as s_umount is held in put_super() and
blkdev_put() can end up taking s_umount again.

That means we need to move the removal of the superblock from @fs_supers
out of generic_shutdown_super() and into deactivate_locked_super() to
ensure that concurrent mounters don't fail to open block devices that
are still in use because blkdev_put() in sb->kill_sb() hasn't been
called yet.

We can now do this as we can make iterators through @fs_super and
@super_blocks wait without holding s_umount. Concurrent mounts will wait
until a dying superblock is fully dead so until sb->kill_sb() has been
called and SB_DEAD been set. Concurrent iterators can already discard
any SB_DYING superblock.
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Message-Id: <20230818-vfs-super-fixes-v3-v3-4-9f0b1876e46b@kernel.org>
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 5e874914
...@@ -153,7 +153,7 @@ static inline bool super_lock_excl(struct super_block *sb) ...@@ -153,7 +153,7 @@ static inline bool super_lock_excl(struct super_block *sb)
} }
/* wake waiters */ /* wake waiters */
#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING) #define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING | SB_DEAD)
static void super_wake(struct super_block *sb, unsigned int flag) static void super_wake(struct super_block *sb, unsigned int flag)
{ {
WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS)); WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS));
...@@ -461,6 +461,25 @@ void deactivate_locked_super(struct super_block *s) ...@@ -461,6 +461,25 @@ void deactivate_locked_super(struct super_block *s)
list_lru_destroy(&s->s_dentry_lru); list_lru_destroy(&s->s_dentry_lru);
list_lru_destroy(&s->s_inode_lru); list_lru_destroy(&s->s_inode_lru);
/*
* Remove it from @fs_supers so it isn't found by new
* sget{_fc}() walkers anymore. Any concurrent mounter still
* managing to grab a temporary reference is guaranteed to
* already see SB_DYING and will wait until we notify them about
* SB_DEAD.
*/
spin_lock(&sb_lock);
hlist_del_init(&s->s_instances);
spin_unlock(&sb_lock);
/*
* Let concurrent mounts know that this thing is really dead.
* We don't need @sb->s_umount here as every concurrent caller
* will see SB_DYING and either discard the superblock or wait
* for SB_DEAD.
*/
super_wake(s, SB_DEAD);
put_filesystem(fs); put_filesystem(fs);
put_super(s); put_super(s);
} else { } else {
...@@ -517,6 +536,45 @@ static int grab_super(struct super_block *s) __releases(sb_lock) ...@@ -517,6 +536,45 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
return 0; return 0;
} }
static inline bool wait_dead(struct super_block *sb)
{
unsigned int flags;
/*
* Pairs with memory barrier in super_wake() and ensures
* that we see SB_DEAD after we're woken.
*/
flags = smp_load_acquire(&sb->s_flags);
return flags & SB_DEAD;
}
/**
* grab_super_dead - acquire an active reference to a superblock
* @sb: superblock to acquire
*
* Acquire a temporary reference on a superblock and try to trade it for
* an active reference. This is used in sget{_fc}() to wait for a
* superblock to either become SB_BORN or for it to pass through
* sb->kill() and be marked as SB_DEAD.
*
* Return: This returns true if an active reference could be acquired,
* false if not.
*/
static bool grab_super_dead(struct super_block *sb)
{
sb->s_count++;
if (grab_super(sb)) {
put_super(sb);
lockdep_assert_held(&sb->s_umount);
return true;
}
wait_var_event(&sb->s_flags, wait_dead(sb));
put_super(sb);
lockdep_assert_not_held(&sb->s_umount);
return false;
}
/* /*
* super_trylock_shared - try to grab ->s_umount shared * super_trylock_shared - try to grab ->s_umount shared
* @sb: reference we are trying to grab * @sb: reference we are trying to grab
...@@ -643,15 +701,14 @@ void generic_shutdown_super(struct super_block *sb) ...@@ -643,15 +701,14 @@ void generic_shutdown_super(struct super_block *sb)
spin_unlock(&sb->s_inode_list_lock); spin_unlock(&sb->s_inode_list_lock);
} }
} }
spin_lock(&sb_lock);
/* should be initialized for __put_super_and_need_restart() */
hlist_del_init(&sb->s_instances);
spin_unlock(&sb_lock);
/* /*
* Broadcast to everyone that grabbed a temporary reference to this * Broadcast to everyone that grabbed a temporary reference to this
* superblock before we removed it from @fs_supers that the superblock * superblock before we removed it from @fs_supers that the superblock
* is dying. Every walker of @fs_supers outside of sget{_fc}() will now * is dying. Every walker of @fs_supers outside of sget{_fc}() will now
* discard this superblock and treat it as dead. * discard this superblock and treat it as dead.
*
* We leave the superblock on @fs_supers so it can be found by
* sget{_fc}() until we passed sb->kill_sb().
*/ */
super_wake(sb, SB_DYING); super_wake(sb, SB_DYING);
super_unlock_excl(sb); super_unlock_excl(sb);
...@@ -746,7 +803,7 @@ struct super_block *sget_fc(struct fs_context *fc, ...@@ -746,7 +803,7 @@ struct super_block *sget_fc(struct fs_context *fc,
destroy_unused_super(s); destroy_unused_super(s);
return ERR_PTR(-EBUSY); return ERR_PTR(-EBUSY);
} }
if (!grab_super(old)) if (!grab_super_dead(old))
goto retry; goto retry;
destroy_unused_super(s); destroy_unused_super(s);
return old; return old;
...@@ -790,7 +847,7 @@ struct super_block *sget(struct file_system_type *type, ...@@ -790,7 +847,7 @@ struct super_block *sget(struct file_system_type *type,
destroy_unused_super(s); destroy_unused_super(s);
return ERR_PTR(-EBUSY); return ERR_PTR(-EBUSY);
} }
if (!grab_super(old)) if (!grab_super_dead(old))
goto retry; goto retry;
destroy_unused_super(s); destroy_unused_super(s);
return old; return old;
......
...@@ -1095,6 +1095,7 @@ extern int send_sigurg(struct fown_struct *fown); ...@@ -1095,6 +1095,7 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */ #define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */
/* These sb flags are internal to the kernel */ /* These sb flags are internal to the kernel */
#define SB_DEAD BIT(21)
#define SB_DYING BIT(24) #define SB_DYING BIT(24)
#define SB_SUBMOUNT BIT(26) #define SB_SUBMOUNT BIT(26)
#define SB_FORCE BIT(27) #define SB_FORCE BIT(27)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment