Commit c00c5e1d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
 "Fix some syzbot-detected bugs, as well as other bugs found by I/O
  injection testing.

  Change ext4's fallocate to consistently drop set[ug]id bits when an
  fallocate operation might possibly change the user-visible contents of
  a file.

  Also, improve handling of potentially invalid values in the the
  s_overhead_cluster superblock field to avoid ext4 returning a negative
  number of free blocks"

* tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  jbd2: fix a potential race while discarding reserved buffers after an abort
  ext4: update the cached overhead value in the superblock
  ext4: force overhead calculation if the s_overhead_cluster makes no sense
  ext4: fix overhead calculation to account for the reserved gdt blocks
  ext4, doc: fix incorrect h_reserved size
  ext4: limit length to bitmap_maxbytes - blocksize in punch_hole
  ext4: fix use-after-free in ext4_search_dir
  ext4: fix bug_on in start_this_handle during umount filesystem
  ext4: fix symlink file size not match to file content
  ext4: fix fallocate to use file_modified to update permissions consistently
parents 2e5991fa 23e3d7f7
......@@ -76,7 +76,7 @@ The beginning of an extended attribute block is in
- Checksum of the extended attribute block.
* - 0x14
- \_\_u32
- h\_reserved[2]
- h\_reserved[3]
- Zero.
The checksum is calculated against the FS UUID, the 64-bit block number
......
......@@ -2273,6 +2273,10 @@ static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
* Structure of a directory entry
*/
#define EXT4_NAME_LEN 255
/*
* Base length of the ext4 directory entry excluding the name length
*/
#define EXT4_BASE_DIR_LEN (sizeof(struct ext4_dir_entry_2) - EXT4_NAME_LEN)
struct ext4_dir_entry {
__le32 inode; /* Inode number */
......@@ -3032,7 +3036,7 @@ extern int ext4_inode_attach_jinode(struct inode *inode);
extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *);
extern int ext4_break_layouts(struct inode *);
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
extern void ext4_set_inode_flags(struct inode *, bool init);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
......@@ -3064,6 +3068,7 @@ int ext4_fileattr_set(struct user_namespace *mnt_userns,
struct dentry *dentry, struct fileattr *fa);
int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa);
extern void ext4_reset_inode_seed(struct inode *inode);
int ext4_update_overhead(struct super_block *sb);
/* migrate.c */
extern int ext4_ext_migrate(struct inode *);
......
......@@ -4500,9 +4500,9 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
return ret > 0 ? ret2 : ret;
}
static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len);
static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
static int ext4_insert_range(struct file *file, loff_t offset, loff_t len);
static long ext4_zero_range(struct file *file, loff_t offset,
loff_t len, int mode)
......@@ -4574,6 +4574,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
/* Wait all existing dio workers, newcomers will block on i_rwsem */
inode_dio_wait(inode);
ret = file_modified(file);
if (ret)
goto out_mutex;
/* Preallocate the range including the unaligned edges */
if (partial_begin || partial_end) {
ret = ext4_alloc_file_blocks(file,
......@@ -4690,7 +4694,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
return -EOPNOTSUPP;
if (mode & FALLOC_FL_PUNCH_HOLE) {
ret = ext4_punch_hole(inode, offset, len);
ret = ext4_punch_hole(file, offset, len);
goto exit;
}
......@@ -4699,12 +4703,12 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
goto exit;
if (mode & FALLOC_FL_COLLAPSE_RANGE) {
ret = ext4_collapse_range(inode, offset, len);
ret = ext4_collapse_range(file, offset, len);
goto exit;
}
if (mode & FALLOC_FL_INSERT_RANGE) {
ret = ext4_insert_range(inode, offset, len);
ret = ext4_insert_range(file, offset, len);
goto exit;
}
......@@ -4740,6 +4744,10 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
/* Wait all existing dio workers, newcomers will block on i_rwsem */
inode_dio_wait(inode);
ret = file_modified(file);
if (ret)
goto out;
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
if (ret)
goto out;
......@@ -5241,8 +5249,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
* This implements the fallocate's collapse range functionality for ext4
* Returns: 0 and non-zero on error.
*/
static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping;
ext4_lblk_t punch_start, punch_stop;
......@@ -5294,6 +5303,10 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
/* Wait for existing dio to complete */
inode_dio_wait(inode);
ret = file_modified(file);
if (ret)
goto out_mutex;
/*
* Prevent page faults from reinstantiating pages we have released from
* page cache.
......@@ -5387,8 +5400,9 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
* by len bytes.
* Returns 0 on success, error otherwise.
*/
static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping;
handle_t *handle;
......@@ -5445,6 +5459,10 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
/* Wait for existing dio to complete */
inode_dio_wait(inode);
ret = file_modified(file);
if (ret)
goto out_mutex;
/*
* Prevent page faults from reinstantiating pages we have released from
* page cache.
......
......@@ -3953,12 +3953,14 @@ int ext4_break_layouts(struct inode *inode)
* Returns: 0 on success or negative on failure
*/
int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
{
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
ext4_lblk_t first_block, stop_block;
struct address_space *mapping = inode->i_mapping;
loff_t first_block_offset, last_block_offset;
loff_t first_block_offset, last_block_offset, max_length;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
handle_t *handle;
unsigned int credits;
int ret = 0, ret2 = 0;
......@@ -4001,6 +4003,14 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
offset;
}
/*
* For punch hole the length + offset needs to be within one block
* before last range. Adjust the length if it goes beyond that limit.
*/
max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize;
if (offset + length > max_length)
length = max_length - offset;
if (offset & (sb->s_blocksize - 1) ||
(offset + length) & (sb->s_blocksize - 1)) {
/*
......@@ -4016,6 +4026,10 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
/* Wait all existing dio workers, newcomers will block on i_rwsem */
inode_dio_wait(inode);
ret = file_modified(file);
if (ret)
goto out_mutex;
/*
* Prevent page faults from reinstantiating pages we have released from
* page cache.
......
......@@ -1652,3 +1652,19 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
}
#endif
static void set_overhead(struct ext4_super_block *es, const void *arg)
{
es->s_overhead_clusters = cpu_to_le32(*((unsigned long *) arg));
}
int ext4_update_overhead(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (sb_rdonly(sb) || sbi->s_overhead == 0 ||
sbi->s_overhead == le32_to_cpu(sbi->s_es->s_overhead_clusters))
return 0;
return ext4_update_superblocks_fn(sb, set_overhead, &sbi->s_overhead);
}
......@@ -1466,10 +1466,10 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
de = (struct ext4_dir_entry_2 *)search_buf;
dlimit = search_buf + buf_size;
while ((char *) de < dlimit) {
while ((char *) de < dlimit - EXT4_BASE_DIR_LEN) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
if ((char *) de + de->name_len <= dlimit &&
if (de->name + de->name_len <= dlimit &&
ext4_match(dir, fname, de)) {
/* found a match - just to be sure, do
* a full check */
......
......@@ -134,8 +134,10 @@ static void ext4_finish_bio(struct bio *bio)
continue;
}
clear_buffer_async_write(bh);
if (bio->bi_status)
if (bio->bi_status) {
set_buffer_write_io_error(bh);
buffer_io_error(bh);
}
} while ((bh = bh->b_this_page) != head);
spin_unlock_irqrestore(&head->b_uptodate_lock, flags);
if (!under_io) {
......
......@@ -1199,20 +1199,25 @@ static void ext4_put_super(struct super_block *sb)
int aborted = 0;
int i, err;
ext4_unregister_li_request(sb);
ext4_quota_off_umount(sb);
flush_work(&sbi->s_error_work);
destroy_workqueue(sbi->rsv_conversion_wq);
ext4_release_orphan_info(sb);
/*
* Unregister sysfs before destroying jbd2 journal.
* Since we could still access attr_journal_task attribute via sysfs
* path which could have sbi->s_journal->j_task as NULL
* Unregister sysfs before flush sbi->s_error_work.
* Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
* read metadata verify failed then will queue error work.
* flush_stashed_error_work will call start_this_handle may trigger
* BUG_ON.
*/
ext4_unregister_sysfs(sb);
ext4_unregister_li_request(sb);
ext4_quota_off_umount(sb);
flush_work(&sbi->s_error_work);
destroy_workqueue(sbi->rsv_conversion_wq);
ext4_release_orphan_info(sb);
if (sbi->s_journal) {
aborted = is_journal_aborted(sbi->s_journal);
err = jbd2_journal_destroy(sbi->s_journal);
......@@ -4172,9 +4177,11 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,
ext4_fsblk_t first_block, last_block, b;
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
int s, j, count = 0;
int has_super = ext4_bg_has_super(sb, grp);
if (!ext4_has_feature_bigalloc(sb))
return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
return (has_super + ext4_bg_num_gdb(sb, grp) +
(has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
sbi->s_itb_per_group + 2);
first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
......@@ -5282,9 +5289,18 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
* Get the # of file system overhead blocks from the
* superblock if present.
*/
if (es->s_overhead_clusters)
sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
else {
/* ignore the precalculated value if it is ridiculous */
if (sbi->s_overhead > ext4_blocks_count(es))
sbi->s_overhead = 0;
/*
* If the bigalloc feature is not enabled recalculating the
* overhead doesn't take long, so we might as well just redo
* it to make sure we are using the correct value.
*/
if (!ext4_has_feature_bigalloc(sb))
sbi->s_overhead = 0;
if (sbi->s_overhead == 0) {
err = ext4_calculate_overhead(sb);
if (err)
goto failed_mount_wq;
......@@ -5602,6 +5618,8 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
"Quota mode: %s.", descr, ext4_quota_mode(sb));
/* Update the s_overhead_clusters if necessary */
ext4_update_overhead(sb);
return 0;
free_sbi:
......
......@@ -488,7 +488,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd2_journal_wait_updates(journal);
commit_transaction->t_state = T_SWITCH;
write_unlock(&journal->j_state_lock);
J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
journal->j_max_transaction_buffers);
......@@ -508,6 +507,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* has reserved. This is consistent with the existing behaviour
* that multiple jbd2_journal_get_write_access() calls to the same
* buffer are perfectly permissible.
* We use journal->j_state_lock here to serialize processing of
* t_reserved_list with eviction of buffers from journal_unmap_buffer().
*/
while (commit_transaction->t_reserved_list) {
jh = commit_transaction->t_reserved_list;
......@@ -527,6 +528,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd2_journal_refile_buffer(journal, jh);
}
write_unlock(&journal->j_state_lock);
/*
* Now try to drop any written-back buffers from the journal's
* checkpoint lists. We do this *before* commit because it potentially
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment