Commit 89092016 authored by Theodore Ts'o's avatar Theodore Ts'o Committed by Kelsey Skunberg

ext4: fix potential race between online resizing and write operations

BugLink: https://bugs.launchpad.net/bugs/1868628

commit 1d0c3924 upstream.

During an online resize an array of pointers to buffer heads gets
replaced so it can get enlarged.  If there is a racing block
allocation or deallocation which uses the old array, and the old array
has gotten reused this can lead to a GPF or some other random kernel
memory getting modified.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443
Link: https://lore.kernel.org/r/20200221053458.730016-2-tytso@mit.eduReported-by: default avatarSuraj Jitindar Singh <surajjs@amazon.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 4.4.x
Cc: stable@kernel.org # 4.9.x
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
Signed-off-by: default avatarKhalid Elmously <khalid.elmously@canonical.com>
Signed-off-by: default avatarKelsey Skunberg <kelsey.skunberg@canonical.com>
parent b582debd
......@@ -279,6 +279,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
ext4_group_t ngroups = ext4_get_groups_count(sb);
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct buffer_head *bh_p;
if (block_group >= ngroups) {
ext4_error(sb, "block_group >= groups_count - block_group = %u,"
......@@ -289,7 +290,14 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
if (!sbi->s_group_desc[group_desc]) {
bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc);
/*
* sbi_array_rcu_deref returns with rcu unlocked, this is ok since
* the pointer being dereferenced won't be dereferenced again. By
* looking at the usage in add_new_gdb() the value isn't modified,
* just the pointer, and so it remains valid.
*/
if (!bh_p) {
ext4_error(sb, "Group descriptor not loaded - "
"block_group = %u, group_desc = %u, desc = %u",
block_group, group_desc, offset);
......@@ -297,10 +305,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
}
desc = (struct ext4_group_desc *)(
(__u8 *)sbi->s_group_desc[group_desc]->b_data +
(__u8 *)bh_p->b_data +
offset * EXT4_DESC_SIZE(sb));
if (bh)
*bh = sbi->s_group_desc[group_desc];
*bh = bh_p;
return desc;
}
......
......@@ -1303,7 +1303,7 @@ struct ext4_sb_info {
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
struct buffer_head **s_group_desc;
struct buffer_head * __rcu *s_group_desc;
unsigned int s_mount_opt;
unsigned int s_mount_opt2;
unsigned int s_mount_flags;
......@@ -1491,6 +1491,23 @@ static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io)
inode->i_private = io;
}
/*
* Returns: sbi->field[index]
* Used to access an array element from the following sbi fields which require
* rcu protection to avoid dereferencing an invalid pointer due to reassignment
* - s_group_desc
* - s_group_info
* - s_flex_group
*/
#define sbi_array_rcu_deref(sbi, field, index) \
({ \
typeof(*((sbi)->field)) _v; \
rcu_read_lock(); \
_v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \
rcu_read_unlock(); \
_v; \
})
/*
* Inode dynamic state flags
*/
......@@ -2558,6 +2575,7 @@ extern int ext4_generic_delete_entry(handle_t *handle,
extern int ext4_empty_dir(struct inode *inode);
/* resize.c */
extern void ext4_kvfree_array_rcu(void *to_free);
extern int ext4_group_add(struct super_block *sb,
struct ext4_new_group_data *input);
extern int ext4_group_extend(struct super_block *sb,
......
......@@ -16,6 +16,33 @@
#include "ext4_jbd2.h"
struct ext4_rcu_ptr {
struct rcu_head rcu;
void *ptr;
};
static void ext4_rcu_ptr_callback(struct rcu_head *head)
{
struct ext4_rcu_ptr *ptr;
ptr = container_of(head, struct ext4_rcu_ptr, rcu);
kvfree(ptr->ptr);
kfree(ptr);
}
void ext4_kvfree_array_rcu(void *to_free)
{
struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
if (ptr) {
ptr->ptr = to_free;
call_rcu(&ptr->rcu, ext4_rcu_ptr_callback);
return;
}
synchronize_rcu();
kvfree(to_free);
}
int ext4_resize_begin(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
......@@ -541,8 +568,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
brelse(gdb);
goto out;
}
memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
gdb->b_size);
memcpy(gdb->b_data, sbi_array_rcu_deref(sbi,
s_group_desc, j)->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
err = ext4_handle_dirty_metadata(handle, NULL, gdb);
......@@ -849,13 +876,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
}
brelse(dind);
o_group_desc = EXT4_SB(sb)->s_group_desc;
rcu_read_lock();
o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
rcu_read_unlock();
n_group_desc[gdb_num] = gdb_bh;
EXT4_SB(sb)->s_group_desc = n_group_desc;
rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
EXT4_SB(sb)->s_gdb_count++;
kvfree(o_group_desc);
ext4_kvfree_array_rcu(o_group_desc);
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
err = ext4_handle_dirty_super(handle, sb);
......@@ -903,9 +932,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
return err;
}
o_group_desc = EXT4_SB(sb)->s_group_desc;
rcu_read_lock();
o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
rcu_read_unlock();
n_group_desc[gdb_num] = gdb_bh;
BUFFER_TRACE(gdb_bh, "get_write_access");
......@@ -916,9 +947,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
return err;
}
EXT4_SB(sb)->s_group_desc = n_group_desc;
rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
EXT4_SB(sb)->s_gdb_count++;
kvfree(o_group_desc);
ext4_kvfree_array_rcu(o_group_desc);
return err;
}
......@@ -1180,7 +1211,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
* use non-sparse filesystems anymore. This is already checked above.
*/
if (gdb_off) {
gdb_bh = sbi->s_group_desc[gdb_num];
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
gdb_num);
BUFFER_TRACE(gdb_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, gdb_bh);
......@@ -1262,7 +1294,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
/*
* get_write_access() has been called on gdb_bh by ext4_add_new_desc().
*/
gdb_bh = sbi->s_group_desc[gdb_num];
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num);
/* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)(gdb_bh->b_data +
gdb_off * EXT4_DESC_SIZE(sb));
......@@ -1489,7 +1521,8 @@ static int ext4_flex_group_add(struct super_block *sb,
for (; gdb_num <= gdb_num_end; gdb_num++) {
struct buffer_head *gdb_bh;
gdb_bh = sbi->s_group_desc[gdb_num];
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
gdb_num);
if (old_gdb == gdb_bh->b_blocknr)
continue;
update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
......
......@@ -798,6 +798,7 @@ static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
struct buffer_head **group_desc;
int aborted = 0;
int i, err;
......@@ -829,9 +830,12 @@ static void ext4_put_super(struct super_block *sb)
if (!(sb->s_flags & MS_RDONLY))
ext4_commit_super(sb, 1);
rcu_read_lock();
group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(sbi->s_group_desc[i]);
kvfree(sbi->s_group_desc);
brelse(group_desc[i]);
kvfree(group_desc);
rcu_read_unlock();
kvfree(sbi->s_flex_groups);
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
......@@ -3259,7 +3263,7 @@ static void ext4_set_resv_clusters(struct super_block *sb)
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
char *orig_data = kstrdup(data, GFP_KERNEL);
struct buffer_head *bh;
struct buffer_head *bh, **group_desc;
struct ext4_super_block *es = NULL;
struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
ext4_fsblk_t block;
......@@ -3828,9 +3832,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
}
sbi->s_group_desc = ext4_kvmalloc(db_count *
rcu_assign_pointer(sbi->s_group_desc,
ext4_kvmalloc(db_count *
sizeof(struct buffer_head *),
GFP_KERNEL);
GFP_KERNEL));
if (sbi->s_group_desc == NULL) {
ext4_msg(sb, KERN_ERR, "not enough memory");
ret = -ENOMEM;
......@@ -3840,14 +3845,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
bgl_lock_init(sbi->s_blockgroup_lock);
for (i = 0; i < db_count; i++) {
struct buffer_head *bh;
block = descriptor_loc(sb, logical_sb_block, i);
sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
if (!sbi->s_group_desc[i]) {
bh = sb_bread_unmovable(sb, block);
if (!bh) {
ext4_msg(sb, KERN_ERR,
"can't read group descriptor %d", i);
db_count = i;
goto failed_mount2;
}
rcu_read_lock();
rcu_dereference(sbi->s_group_desc)[i] = bh;
rcu_read_unlock();
}
sbi->s_gdb_count = db_count;
if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
......@@ -4212,9 +4222,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
failed_mount2:
rcu_read_lock();
group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
kvfree(sbi->s_group_desc);
brelse(group_desc[i]);
kvfree(group_desc);
rcu_read_unlock();
failed_mount:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment