Commit c8585c6f authored by Daeho Jeong's avatar Daeho Jeong Committed by Theodore Ts'o

ext4: fix races between changing inode journal mode and ext4_writepages

In ext4, there is a race condition between changing inode journal mode
and ext4_writepages(). While ext4_writepages() is executed on a
non-journalled mode inode, the inode's journal mode could be enabled
by ioctl() and then, some pages dirtied after switching the journal
mode will be still exposed to ext4_writepages() in non-journaled mode.
To resolve this problem, we use fs-wide per-cpu rw semaphore by Jan
Kara's suggestion because we don't want to waste ext4_inode_info's
space for this extra rare case.
Signed-off-by: default avatarDaeho Jeong <daeho.jeong@samsung.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
parent 4c546592
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <linux/ratelimit.h> #include <linux/ratelimit.h>
#include <crypto/hash.h> #include <crypto/hash.h>
#include <linux/falloc.h> #include <linux/falloc.h>
#include <linux/percpu-rwsem.h>
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/compat.h> #include <linux/compat.h>
#endif #endif
...@@ -1508,6 +1509,9 @@ struct ext4_sb_info { ...@@ -1508,6 +1509,9 @@ struct ext4_sb_info {
struct ratelimit_state s_err_ratelimit_state; struct ratelimit_state s_err_ratelimit_state;
struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_warning_ratelimit_state;
struct ratelimit_state s_msg_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state;
/* Barrier between changing inodes' journal flags and writepages ops. */
struct percpu_rw_semaphore s_journal_flag_rwsem;
}; };
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
......
...@@ -2612,11 +2612,14 @@ static int ext4_writepages(struct address_space *mapping, ...@@ -2612,11 +2612,14 @@ static int ext4_writepages(struct address_space *mapping,
struct blk_plug plug; struct blk_plug plug;
bool give_up_on_write = false; bool give_up_on_write = false;
percpu_down_read(&sbi->s_journal_flag_rwsem);
trace_ext4_writepages(inode, wbc); trace_ext4_writepages(inode, wbc);
if (dax_mapping(mapping)) if (dax_mapping(mapping)) {
return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
wbc); wbc);
goto out_writepages;
}
/* /*
* No pages to write? This is mainly a kludge to avoid starting * No pages to write? This is mainly a kludge to avoid starting
...@@ -2786,6 +2789,7 @@ static int ext4_writepages(struct address_space *mapping, ...@@ -2786,6 +2789,7 @@ static int ext4_writepages(struct address_space *mapping,
out_writepages: out_writepages:
trace_ext4_writepages_result(inode, wbc, ret, trace_ext4_writepages_result(inode, wbc, ret,
nr_to_write - wbc->nr_to_write); nr_to_write - wbc->nr_to_write);
percpu_up_read(&sbi->s_journal_flag_rwsem);
return ret; return ret;
} }
...@@ -5436,6 +5440,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ...@@ -5436,6 +5440,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
journal_t *journal; journal_t *journal;
handle_t *handle; handle_t *handle;
int err; int err;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
/* /*
* We have to be very careful here: changing a data block's * We have to be very careful here: changing a data block's
...@@ -5475,6 +5480,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ...@@ -5475,6 +5480,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
} }
} }
percpu_down_write(&sbi->s_journal_flag_rwsem);
jbd2_journal_lock_updates(journal); jbd2_journal_lock_updates(journal);
/* /*
...@@ -5491,6 +5497,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ...@@ -5491,6 +5497,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
err = jbd2_journal_flush(journal); err = jbd2_journal_flush(journal);
if (err < 0) { if (err < 0) {
jbd2_journal_unlock_updates(journal); jbd2_journal_unlock_updates(journal);
percpu_up_write(&sbi->s_journal_flag_rwsem);
ext4_inode_resume_unlocked_dio(inode); ext4_inode_resume_unlocked_dio(inode);
return err; return err;
} }
...@@ -5499,6 +5506,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ...@@ -5499,6 +5506,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
ext4_set_aops(inode); ext4_set_aops(inode);
jbd2_journal_unlock_updates(journal); jbd2_journal_unlock_updates(journal);
percpu_up_write(&sbi->s_journal_flag_rwsem);
if (val) if (val)
up_write(&EXT4_I(inode)->i_mmap_sem); up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode); ext4_inode_resume_unlocked_dio(inode);
......
...@@ -859,6 +859,7 @@ static void ext4_put_super(struct super_block *sb) ...@@ -859,6 +859,7 @@ static void ext4_put_super(struct super_block *sb)
percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
brelse(sbi->s_sbh); brelse(sbi->s_sbh);
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
for (i = 0; i < EXT4_MAXQUOTAS; i++) for (i = 0; i < EXT4_MAXQUOTAS; i++)
...@@ -3930,6 +3931,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3930,6 +3931,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (!err) if (!err)
err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
GFP_KERNEL); GFP_KERNEL);
if (!err)
err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem);
if (err) { if (err) {
ext4_msg(sb, KERN_ERR, "insufficient memory"); ext4_msg(sb, KERN_ERR, "insufficient memory");
goto failed_mount6; goto failed_mount6;
......
...@@ -37,6 +37,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) ...@@ -37,6 +37,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
free_percpu(brw->fast_read_ctr); free_percpu(brw->fast_read_ctr);
brw->fast_read_ctr = NULL; /* catch use after free bugs */ brw->fast_read_ctr = NULL; /* catch use after free bugs */
} }
EXPORT_SYMBOL_GPL(percpu_free_rwsem);
/* /*
* This is the fast-path for down_read/up_read. If it succeeds we rely * This is the fast-path for down_read/up_read. If it succeeds we rely
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment