Commit 65ae6893 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.1-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - fsync fixes: i_size for truncate vs fsync, dio vs buffered during
   snapshotting, remove complicated but incomplete assertion

 - removed excessive warnigs, misreported device stats updates

 - fix raid56 page mapping for 32bit arch

 - fixes reported by static analyzer

* tag 'for-5.1-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  Btrfs: fix assertion failure on fsync with NO_HOLES enabled
  btrfs: Avoid possible qgroup_rsv_size overflow in btrfs_calculate_inode_block_rsv_size
  btrfs: Fix bound checking in qgroup_trace_new_subtree_blocks
  btrfs: raid56: properly unmap parity page in finish_parity_scrub()
  btrfs: don't report readahead errors and don't update statistics
  Btrfs: fix file corruption after snapshotting due to mix of buffered/DIO writes
  btrfs: remove WARN_ON in log_dir_items
  Btrfs: fix incorrect file size after shrinking truncate and fsync
parents 26a3b01b 0ccc3876
......@@ -6174,7 +6174,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
*
* This is overestimating in most cases.
*/
qgroup_rsv_size = outstanding_extents * fs_info->nodesize;
qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize;
spin_lock(&block_rsv->lock);
block_rsv->size = reserve_size;
......
......@@ -1922,8 +1922,8 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
int i;
/* Level sanity check */
if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL ||
root_level < 0 || root_level >= BTRFS_MAX_LEVEL ||
if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 ||
root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 ||
root_level < cur_level) {
btrfs_err_rl(fs_info,
"%s: bad levels, cur_level=%d root_level=%d",
......
......@@ -2430,8 +2430,9 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
bitmap_clear(rbio->dbitmap, pagenr, 1);
kunmap(p);
for (stripe = 0; stripe < rbio->real_stripes; stripe++)
for (stripe = 0; stripe < nr_data; stripe++)
kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
kunmap(p_page);
}
__free_page(p_page);
......
......@@ -1886,8 +1886,10 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
}
}
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
/*
* We use writeback_inodes_sb here because if we used
* btrfs_start_delalloc_roots we would deadlock with fs freeze.
......@@ -1897,15 +1899,50 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
* from already being in a transaction and our join_transaction doesn't
* have to re-take the fs freeze lock.
*/
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
} else {
struct btrfs_pending_snapshot *pending;
struct list_head *head = &trans->transaction->pending_snapshots;
/*
* Flush dellaloc for any root that is going to be snapshotted.
* This is done to avoid a corrupted version of files, in the
* snapshots, that had both buffered and direct IO writes (even
* if they were done sequentially) due to an unordered update of
* the inode's size on disk.
*/
list_for_each_entry(pending, head, list) {
int ret;
ret = btrfs_start_delalloc_snapshot(pending->root);
if (ret)
return ret;
}
}
return 0;
}
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans)
{
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
struct btrfs_fs_info *fs_info = trans->fs_info;
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
} else {
struct btrfs_pending_snapshot *pending;
struct list_head *head = &trans->transaction->pending_snapshots;
/*
* Wait for any dellaloc that we started previously for the roots
* that are going to be snapshotted. This is to avoid a corrupted
* version of files in the snapshots that had both buffered and
* direct IO writes (even if they were done sequentially).
*/
list_for_each_entry(pending, head, list)
btrfs_wait_ordered_extents(pending->root,
U64_MAX, 0, U64_MAX);
}
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
......@@ -2023,7 +2060,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
extwriter_counter_dec(cur_trans, trans->type);
ret = btrfs_start_delalloc_flush(fs_info);
ret = btrfs_start_delalloc_flush(trans);
if (ret)
goto cleanup_transaction;
......@@ -2039,7 +2076,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (ret)
goto cleanup_transaction;
btrfs_wait_delalloc_flush(fs_info);
btrfs_wait_delalloc_flush(trans);
btrfs_scrub_pause(fs_info);
/*
......
......@@ -3578,9 +3578,16 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
/* find the first key from this transaction again */
/*
* Find the first key from this transaction again. See the note for
* log_new_dir_dentries, if we're logging a directory recursively we
* won't be holding its i_mutex, which means we can modify the directory
* while we're logging it. If we remove an entry between our first
* search and this search we'll not find the key again and can just
* bail.
*/
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
if (WARN_ON(ret != 0))
if (ret != 0)
goto done;
/*
......@@ -4544,6 +4551,19 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
*size_ret = btrfs_inode_size(path->nodes[0], item);
/*
* If the in-memory inode's i_size is smaller then the inode
* size stored in the btree, return the inode's i_size, so
* that we get a correct inode size after replaying the log
* when before a power failure we had a shrinking truncate
* followed by addition of a new name (rename / new hard link).
* Otherwise return the inode size from the btree, to avoid
* data loss when replaying a log due to previously doing a
* write that expands the inode's size and logging a new name
* immediately after.
*/
if (*size_ret > inode->vfs_inode.i_size)
*size_ret = inode->vfs_inode.i_size;
}
btrfs_release_path(path);
......@@ -4705,15 +4725,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, extent) ==
BTRFS_FILE_EXTENT_INLINE) {
len = btrfs_file_extent_ram_bytes(leaf, extent);
ASSERT(len == i_size ||
(len == fs_info->sectorsize &&
btrfs_file_extent_compression(leaf, extent) !=
BTRFS_COMPRESS_NONE) ||
(len < i_size && i_size < fs_info->sectorsize));
BTRFS_FILE_EXTENT_INLINE)
return 0;
}
len = btrfs_file_extent_num_bytes(leaf, extent);
/* Last extent goes beyond i_size, no need to log a hole. */
......
......@@ -6407,7 +6407,7 @@ static void btrfs_end_bio(struct bio *bio)
if (bio_op(bio) == REQ_OP_WRITE)
btrfs_dev_stat_inc_and_print(dev,
BTRFS_DEV_STAT_WRITE_ERRS);
else
else if (!(bio->bi_opf & REQ_RAHEAD))
btrfs_dev_stat_inc_and_print(dev,
BTRFS_DEV_STAT_READ_ERRS);
if (bio->bi_opf & REQ_PREFLUSH)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment