Commit e75cdf98 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes and cleanups from Chris Mason:
 "Some of this got cherry-picked from a github repo this week, but I
  verified the patches.

  We have three small scrub cleanups and a collection of fixes"

* 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  btrfs: Use fs_info directly in btrfs_delete_unused_bgs
  btrfs: Fix lost-data-profile caused by balance bg
  btrfs: Fix lost-data-profile caused by auto removing bg
  btrfs: Remove len argument from scrub_find_csum
  btrfs: Reduce unnecessary arguments in scrub_recheck_block
  btrfs: Use scrub_checksum_data and scrub_checksum_tree_block for scrub_recheck_block_checksum
  btrfs: Reset sblock->xxx_error stats before calling scrub_recheck_block_checksum
  btrfs: scrub: setup all fields for sblock_to_check
  btrfs: scrub: set error stats when tree block spanning stripes
  Btrfs: fix race when listing an inode's xattrs
  Btrfs: fix race leading to BUG_ON when running delalloc for nodatacow
  Btrfs: fix race leading to incorrect item deletion when dropping extents
  Btrfs: fix sleeping inside atomic context in qgroup rescan worker
  Btrfs: fix race waiting for qgroup rescan worker
  btrfs: qgroup: exit the rescan worker during umount
  Btrfs: fix extent accounting for partial direct IO writes
parents ca4ba96e d5f2e33b
...@@ -3780,6 +3780,9 @@ void close_ctree(struct btrfs_root *root) ...@@ -3780,6 +3780,9 @@ void close_ctree(struct btrfs_root *root)
fs_info->closing = 1; fs_info->closing = 1;
smp_mb(); smp_mb();
/* wait for the qgroup rescan worker to stop */
btrfs_qgroup_wait_for_completion(fs_info);
/* wait for the uuid_scan task to finish */ /* wait for the uuid_scan task to finish */
down(&fs_info->uuid_tree_rescan_sem); down(&fs_info->uuid_tree_rescan_sem);
/* avoid complains from lockdep et al., set sem back to initial state */ /* avoid complains from lockdep et al., set sem back to initial state */
......
...@@ -10279,22 +10279,25 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) ...@@ -10279,22 +10279,25 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
block_group = list_first_entry(&fs_info->unused_bgs, block_group = list_first_entry(&fs_info->unused_bgs,
struct btrfs_block_group_cache, struct btrfs_block_group_cache,
bg_list); bg_list);
space_info = block_group->space_info;
list_del_init(&block_group->bg_list); list_del_init(&block_group->bg_list);
space_info = block_group->space_info;
if (ret || btrfs_mixed_space_info(space_info)) { if (ret || btrfs_mixed_space_info(space_info)) {
btrfs_put_block_group(block_group); btrfs_put_block_group(block_group);
continue; continue;
} }
spin_unlock(&fs_info->unused_bgs_lock); spin_unlock(&fs_info->unused_bgs_lock);
mutex_lock(&root->fs_info->delete_unused_bgs_mutex); mutex_lock(&fs_info->delete_unused_bgs_mutex);
/* Don't want to race with allocators so take the groups_sem */ /* Don't want to race with allocators so take the groups_sem */
down_write(&space_info->groups_sem); down_write(&space_info->groups_sem);
spin_lock(&block_group->lock); spin_lock(&block_group->lock);
if (block_group->reserved || if (block_group->reserved ||
btrfs_block_group_used(&block_group->item) || btrfs_block_group_used(&block_group->item) ||
block_group->ro) { block_group->ro ||
list_is_singular(&block_group->list)) {
/* /*
* We want to bail if we made new allocations or have * We want to bail if we made new allocations or have
* outstanding allocations in this block group. We do * outstanding allocations in this block group. We do
...@@ -10410,7 +10413,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) ...@@ -10410,7 +10413,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
end_trans: end_trans:
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
next: next:
mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); mutex_unlock(&fs_info->delete_unused_bgs_mutex);
btrfs_put_block_group(block_group); btrfs_put_block_group(block_group);
spin_lock(&fs_info->unused_bgs_lock); spin_lock(&fs_info->unused_bgs_lock);
} }
......
...@@ -756,8 +756,16 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, ...@@ -756,8 +756,16 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
} }
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid > ino ||
key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) if (key.objectid > ino)
break;
if (WARN_ON_ONCE(key.objectid < ino) ||
key.type < BTRFS_EXTENT_DATA_KEY) {
ASSERT(del_nr == 0);
path->slots[0]++;
goto next_slot;
}
if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
break; break;
fi = btrfs_item_ptr(leaf, path->slots[0], fi = btrfs_item_ptr(leaf, path->slots[0],
...@@ -776,8 +784,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, ...@@ -776,8 +784,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
btrfs_file_extent_inline_len(leaf, btrfs_file_extent_inline_len(leaf,
path->slots[0], fi); path->slots[0], fi);
} else { } else {
WARN_ON(1); /* can't happen */
extent_end = search_start; BUG();
} }
/* /*
......
...@@ -1304,8 +1304,14 @@ static noinline int run_delalloc_nocow(struct inode *inode, ...@@ -1304,8 +1304,14 @@ static noinline int run_delalloc_nocow(struct inode *inode,
num_bytes = 0; num_bytes = 0;
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid > ino || if (found_key.objectid > ino)
found_key.type > BTRFS_EXTENT_DATA_KEY || break;
if (WARN_ON_ONCE(found_key.objectid < ino) ||
found_key.type < BTRFS_EXTENT_DATA_KEY) {
path->slots[0]++;
goto next_slot;
}
if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
found_key.offset > end) found_key.offset > end)
break; break;
...@@ -7503,6 +7509,28 @@ struct btrfs_dio_data { ...@@ -7503,6 +7509,28 @@ struct btrfs_dio_data {
u64 reserve; u64 reserve;
}; };
static void adjust_dio_outstanding_extents(struct inode *inode,
struct btrfs_dio_data *dio_data,
const u64 len)
{
unsigned num_extents;
num_extents = (unsigned) div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
/*
* If we have an outstanding_extents count still set then we're
* within our reservation, otherwise we need to adjust our inode
* counter appropriately.
*/
if (dio_data->outstanding_extents) {
dio_data->outstanding_extents -= num_extents;
} else {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents += num_extents;
spin_unlock(&BTRFS_I(inode)->lock);
}
}
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create) struct buffer_head *bh_result, int create)
{ {
...@@ -7538,8 +7566,11 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, ...@@ -7538,8 +7566,11 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
* If this errors out it's because we couldn't invalidate pagecache for * If this errors out it's because we couldn't invalidate pagecache for
* this range and we need to fallback to buffered. * this range and we need to fallback to buffered.
*/ */
if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) if (lock_extent_direct(inode, lockstart, lockend, &cached_state,
return -ENOTBLK; create)) {
ret = -ENOTBLK;
goto err;
}
em = btrfs_get_extent(inode, NULL, 0, start, len, 0); em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
if (IS_ERR(em)) { if (IS_ERR(em)) {
...@@ -7657,19 +7688,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, ...@@ -7657,19 +7688,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
if (start + len > i_size_read(inode)) if (start + len > i_size_read(inode))
i_size_write(inode, start + len); i_size_write(inode, start + len);
/* adjust_dio_outstanding_extents(inode, dio_data, len);
* If we have an outstanding_extents count still set then we're
* within our reservation, otherwise we need to adjust our inode
* counter appropriately.
*/
if (dio_data->outstanding_extents) {
(dio_data->outstanding_extents)--;
} else {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
btrfs_free_reserved_data_space(inode, start, len); btrfs_free_reserved_data_space(inode, start, len);
WARN_ON(dio_data->reserve < len); WARN_ON(dio_data->reserve < len);
dio_data->reserve -= len; dio_data->reserve -= len;
...@@ -7696,8 +7715,17 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, ...@@ -7696,8 +7715,17 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
unlock_err: unlock_err:
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
unlock_bits, 1, 0, &cached_state, GFP_NOFS); unlock_bits, 1, 0, &cached_state, GFP_NOFS);
err:
if (dio_data) if (dio_data)
current->journal_info = dio_data; current->journal_info = dio_data;
/*
* Compensate the delalloc release we do in btrfs_direct_IO() when we
* write less data then expected, so that we don't underflow our inode's
* outstanding extents counter.
*/
if (create && dio_data)
adjust_dio_outstanding_extents(inode, dio_data, len);
return ret; return ret;
} }
......
...@@ -2198,7 +2198,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, ...@@ -2198,7 +2198,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
int slot; int slot;
int ret; int ret;
path->leave_spinning = 1;
mutex_lock(&fs_info->qgroup_rescan_lock); mutex_lock(&fs_info->qgroup_rescan_lock);
ret = btrfs_search_slot_for_read(fs_info->extent_root, ret = btrfs_search_slot_for_read(fs_info->extent_root,
&fs_info->qgroup_rescan_progress, &fs_info->qgroup_rescan_progress,
...@@ -2286,7 +2285,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) ...@@ -2286,7 +2285,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
goto out; goto out;
err = 0; err = 0;
while (!err) { while (!err && !btrfs_fs_closing(fs_info)) {
trans = btrfs_start_transaction(fs_info->fs_root, 0); trans = btrfs_start_transaction(fs_info->fs_root, 0);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
err = PTR_ERR(trans); err = PTR_ERR(trans);
...@@ -2307,6 +2306,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) ...@@ -2307,6 +2306,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
btrfs_free_path(path); btrfs_free_path(path);
mutex_lock(&fs_info->qgroup_rescan_lock); mutex_lock(&fs_info->qgroup_rescan_lock);
if (!btrfs_fs_closing(fs_info))
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
if (err > 0 && if (err > 0 &&
...@@ -2336,7 +2336,9 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) ...@@ -2336,7 +2336,9 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
} }
btrfs_end_transaction(trans, fs_info->quota_root); btrfs_end_transaction(trans, fs_info->quota_root);
if (err >= 0) { if (btrfs_fs_closing(fs_info)) {
btrfs_info(fs_info, "qgroup scan paused");
} else if (err >= 0) {
btrfs_info(fs_info, "qgroup scan completed%s", btrfs_info(fs_info, "qgroup scan completed%s",
err > 0 ? " (inconsistency flag cleared)" : ""); err > 0 ? " (inconsistency flag cleared)" : "");
} else { } else {
...@@ -2384,12 +2386,11 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, ...@@ -2384,12 +2386,11 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
memset(&fs_info->qgroup_rescan_progress, 0, memset(&fs_info->qgroup_rescan_progress, 0,
sizeof(fs_info->qgroup_rescan_progress)); sizeof(fs_info->qgroup_rescan_progress));
fs_info->qgroup_rescan_progress.objectid = progress_objectid; fs_info->qgroup_rescan_progress.objectid = progress_objectid;
init_completion(&fs_info->qgroup_rescan_completion);
spin_unlock(&fs_info->qgroup_lock); spin_unlock(&fs_info->qgroup_lock);
mutex_unlock(&fs_info->qgroup_rescan_lock); mutex_unlock(&fs_info->qgroup_rescan_lock);
init_completion(&fs_info->qgroup_rescan_completion);
memset(&fs_info->qgroup_rescan_work, 0, memset(&fs_info->qgroup_rescan_work, 0,
sizeof(fs_info->qgroup_rescan_work)); sizeof(fs_info->qgroup_rescan_work));
btrfs_init_work(&fs_info->qgroup_rescan_work, btrfs_init_work(&fs_info->qgroup_rescan_work,
......
...@@ -248,14 +248,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); ...@@ -248,14 +248,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
static int scrub_setup_recheck_block(struct scrub_block *original_sblock, static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
struct scrub_block *sblocks_for_recheck); struct scrub_block *sblocks_for_recheck);
static void scrub_recheck_block(struct btrfs_fs_info *fs_info, static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
struct scrub_block *sblock, int is_metadata,
int have_csum, u8 *csum, u64 generation,
u16 csum_size, int retry_failed_mirror);
static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
struct scrub_block *sblock, struct scrub_block *sblock,
int is_metadata, int have_csum, int retry_failed_mirror);
const u8 *csum, u64 generation, static void scrub_recheck_block_checksum(struct scrub_block *sblock);
u16 csum_size);
static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good); struct scrub_block *sblock_good);
static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
...@@ -889,11 +884,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) ...@@ -889,11 +884,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
struct btrfs_fs_info *fs_info; struct btrfs_fs_info *fs_info;
u64 length; u64 length;
u64 logical; u64 logical;
u64 generation;
unsigned int failed_mirror_index; unsigned int failed_mirror_index;
unsigned int is_metadata; unsigned int is_metadata;
unsigned int have_csum; unsigned int have_csum;
u8 *csum;
struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */ struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */
struct scrub_block *sblock_bad; struct scrub_block *sblock_bad;
int ret; int ret;
...@@ -918,13 +911,11 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) ...@@ -918,13 +911,11 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
} }
length = sblock_to_check->page_count * PAGE_SIZE; length = sblock_to_check->page_count * PAGE_SIZE;
logical = sblock_to_check->pagev[0]->logical; logical = sblock_to_check->pagev[0]->logical;
generation = sblock_to_check->pagev[0]->generation;
BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1); BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1; failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
is_metadata = !(sblock_to_check->pagev[0]->flags & is_metadata = !(sblock_to_check->pagev[0]->flags &
BTRFS_EXTENT_FLAG_DATA); BTRFS_EXTENT_FLAG_DATA);
have_csum = sblock_to_check->pagev[0]->have_csum; have_csum = sblock_to_check->pagev[0]->have_csum;
csum = sblock_to_check->pagev[0]->csum;
dev = sblock_to_check->pagev[0]->dev; dev = sblock_to_check->pagev[0]->dev;
if (sctx->is_dev_replace && !is_metadata && !have_csum) { if (sctx->is_dev_replace && !is_metadata && !have_csum) {
...@@ -987,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) ...@@ -987,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
sblock_bad = sblocks_for_recheck + failed_mirror_index; sblock_bad = sblocks_for_recheck + failed_mirror_index;
/* build and submit the bios for the failed mirror, check checksums */ /* build and submit the bios for the failed mirror, check checksums */
scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, scrub_recheck_block(fs_info, sblock_bad, 1);
csum, generation, sctx->csum_size, 1);
if (!sblock_bad->header_error && !sblock_bad->checksum_error && if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
sblock_bad->no_io_error_seen) { sblock_bad->no_io_error_seen) {
...@@ -1101,9 +1091,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) ...@@ -1101,9 +1091,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
sblock_other = sblocks_for_recheck + mirror_index; sblock_other = sblocks_for_recheck + mirror_index;
/* build and submit the bios, check checksums */ /* build and submit the bios, check checksums */
scrub_recheck_block(fs_info, sblock_other, is_metadata, scrub_recheck_block(fs_info, sblock_other, 0);
have_csum, csum, generation,
sctx->csum_size, 0);
if (!sblock_other->header_error && if (!sblock_other->header_error &&
!sblock_other->checksum_error && !sblock_other->checksum_error &&
...@@ -1215,9 +1203,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) ...@@ -1215,9 +1203,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
* is verified, but most likely the data comes out * is verified, but most likely the data comes out
* of the page cache. * of the page cache.
*/ */
scrub_recheck_block(fs_info, sblock_bad, scrub_recheck_block(fs_info, sblock_bad, 1);
is_metadata, have_csum, csum,
generation, sctx->csum_size, 1);
if (!sblock_bad->header_error && if (!sblock_bad->header_error &&
!sblock_bad->checksum_error && !sblock_bad->checksum_error &&
sblock_bad->no_io_error_seen) sblock_bad->no_io_error_seen)
...@@ -1318,6 +1304,9 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, ...@@ -1318,6 +1304,9 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
u64 length = original_sblock->page_count * PAGE_SIZE; u64 length = original_sblock->page_count * PAGE_SIZE;
u64 logical = original_sblock->pagev[0]->logical; u64 logical = original_sblock->pagev[0]->logical;
u64 generation = original_sblock->pagev[0]->generation;
u64 flags = original_sblock->pagev[0]->flags;
u64 have_csum = original_sblock->pagev[0]->have_csum;
struct scrub_recover *recover; struct scrub_recover *recover;
struct btrfs_bio *bbio; struct btrfs_bio *bbio;
u64 sublen; u64 sublen;
...@@ -1372,6 +1361,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, ...@@ -1372,6 +1361,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
sblock = sblocks_for_recheck + mirror_index; sblock = sblocks_for_recheck + mirror_index;
sblock->sctx = sctx; sblock->sctx = sctx;
page = kzalloc(sizeof(*page), GFP_NOFS); page = kzalloc(sizeof(*page), GFP_NOFS);
if (!page) { if (!page) {
leave_nomem: leave_nomem:
...@@ -1383,7 +1373,15 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, ...@@ -1383,7 +1373,15 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
} }
scrub_page_get(page); scrub_page_get(page);
sblock->pagev[page_index] = page; sblock->pagev[page_index] = page;
page->sblock = sblock;
page->flags = flags;
page->generation = generation;
page->logical = logical; page->logical = logical;
page->have_csum = have_csum;
if (have_csum)
memcpy(page->csum,
original_sblock->pagev[0]->csum,
sctx->csum_size);
scrub_stripe_index_and_offset(logical, scrub_stripe_index_and_offset(logical,
bbio->map_type, bbio->map_type,
...@@ -1474,15 +1472,12 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, ...@@ -1474,15 +1472,12 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
* the pages that are errored in the just handled mirror can be repaired. * the pages that are errored in the just handled mirror can be repaired.
*/ */
static void scrub_recheck_block(struct btrfs_fs_info *fs_info, static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
struct scrub_block *sblock, int is_metadata, struct scrub_block *sblock,
int have_csum, u8 *csum, u64 generation, int retry_failed_mirror)
u16 csum_size, int retry_failed_mirror)
{ {
int page_num; int page_num;
sblock->no_io_error_seen = 1; sblock->no_io_error_seen = 1;
sblock->header_error = 0;
sblock->checksum_error = 0;
for (page_num = 0; page_num < sblock->page_count; page_num++) { for (page_num = 0; page_num < sblock->page_count; page_num++) {
struct bio *bio; struct bio *bio;
...@@ -1518,9 +1513,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, ...@@ -1518,9 +1513,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
} }
if (sblock->no_io_error_seen) if (sblock->no_io_error_seen)
scrub_recheck_block_checksum(fs_info, sblock, is_metadata, scrub_recheck_block_checksum(sblock);
have_csum, csum, generation,
csum_size);
return; return;
} }
...@@ -1535,61 +1528,16 @@ static inline int scrub_check_fsid(u8 fsid[], ...@@ -1535,61 +1528,16 @@ static inline int scrub_check_fsid(u8 fsid[],
return !ret; return !ret;
} }
static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, static void scrub_recheck_block_checksum(struct scrub_block *sblock)
struct scrub_block *sblock,
int is_metadata, int have_csum,
const u8 *csum, u64 generation,
u16 csum_size)
{ {
int page_num; sblock->header_error = 0;
u8 calculated_csum[BTRFS_CSUM_SIZE]; sblock->checksum_error = 0;
u32 crc = ~(u32)0; sblock->generation_error = 0;
void *mapped_buffer;
WARN_ON(!sblock->pagev[0]->page);
if (is_metadata) {
struct btrfs_header *h;
mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
h = (struct btrfs_header *)mapped_buffer;
if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) ||
!scrub_check_fsid(h->fsid, sblock->pagev[0]) ||
memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
BTRFS_UUID_SIZE)) {
sblock->header_error = 1;
} else if (generation != btrfs_stack_header_generation(h)) {
sblock->header_error = 1;
sblock->generation_error = 1;
}
csum = h->csum;
} else {
if (!have_csum)
return;
mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
}
for (page_num = 0;;) { if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
if (page_num == 0 && is_metadata) scrub_checksum_data(sblock);
crc = btrfs_csum_data(
((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
else else
crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE); scrub_checksum_tree_block(sblock);
kunmap_atomic(mapped_buffer);
page_num++;
if (page_num >= sblock->page_count)
break;
WARN_ON(!sblock->pagev[page_num]->page);
mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page);
}
btrfs_csum_final(crc, calculated_csum);
if (memcmp(calculated_csum, csum, csum_size))
sblock->checksum_error = 1;
} }
static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
...@@ -1833,6 +1781,18 @@ static int scrub_checksum(struct scrub_block *sblock) ...@@ -1833,6 +1781,18 @@ static int scrub_checksum(struct scrub_block *sblock)
u64 flags; u64 flags;
int ret; int ret;
/*
* No need to initialize these stats currently,
* because this function only use return value
* instead of these stats value.
*
* Todo:
* always use stats
*/
sblock->header_error = 0;
sblock->generation_error = 0;
sblock->checksum_error = 0;
WARN_ON(sblock->page_count < 1); WARN_ON(sblock->page_count < 1);
flags = sblock->pagev[0]->flags; flags = sblock->pagev[0]->flags;
ret = 0; ret = 0;
...@@ -1858,7 +1818,6 @@ static int scrub_checksum_data(struct scrub_block *sblock) ...@@ -1858,7 +1818,6 @@ static int scrub_checksum_data(struct scrub_block *sblock)
struct page *page; struct page *page;
void *buffer; void *buffer;
u32 crc = ~(u32)0; u32 crc = ~(u32)0;
int fail = 0;
u64 len; u64 len;
int index; int index;
...@@ -1889,9 +1848,9 @@ static int scrub_checksum_data(struct scrub_block *sblock) ...@@ -1889,9 +1848,9 @@ static int scrub_checksum_data(struct scrub_block *sblock)
btrfs_csum_final(crc, csum); btrfs_csum_final(crc, csum);
if (memcmp(csum, on_disk_csum, sctx->csum_size)) if (memcmp(csum, on_disk_csum, sctx->csum_size))
fail = 1; sblock->checksum_error = 1;
return fail; return sblock->checksum_error;
} }
static int scrub_checksum_tree_block(struct scrub_block *sblock) static int scrub_checksum_tree_block(struct scrub_block *sblock)
...@@ -1907,8 +1866,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) ...@@ -1907,8 +1866,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
u64 mapped_size; u64 mapped_size;
void *p; void *p;
u32 crc = ~(u32)0; u32 crc = ~(u32)0;
int fail = 0;
int crc_fail = 0;
u64 len; u64 len;
int index; int index;
...@@ -1923,19 +1880,20 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) ...@@ -1923,19 +1880,20 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
* a) don't have an extent buffer and * a) don't have an extent buffer and
* b) the page is already kmapped * b) the page is already kmapped
*/ */
if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h)) if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
++fail; sblock->header_error = 1;
if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) {
++fail; sblock->header_error = 1;
sblock->generation_error = 1;
}
if (!scrub_check_fsid(h->fsid, sblock->pagev[0])) if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
++fail; sblock->header_error = 1;
if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
BTRFS_UUID_SIZE)) BTRFS_UUID_SIZE))
++fail; sblock->header_error = 1;
len = sctx->nodesize - BTRFS_CSUM_SIZE; len = sctx->nodesize - BTRFS_CSUM_SIZE;
mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
...@@ -1960,9 +1918,9 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) ...@@ -1960,9 +1918,9 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
btrfs_csum_final(crc, calculated_csum); btrfs_csum_final(crc, calculated_csum);
if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
++crc_fail; sblock->checksum_error = 1;
return fail || crc_fail; return sblock->header_error || sblock->checksum_error;
} }
static int scrub_checksum_super(struct scrub_block *sblock) static int scrub_checksum_super(struct scrub_block *sblock)
...@@ -2176,39 +2134,27 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work) ...@@ -2176,39 +2134,27 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work)
{ {
struct scrub_block *sblock = container_of(work, struct scrub_block, work); struct scrub_block *sblock = container_of(work, struct scrub_block, work);
struct scrub_ctx *sctx = sblock->sctx; struct scrub_ctx *sctx = sblock->sctx;
struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
unsigned int is_metadata;
unsigned int have_csum;
u8 *csum;
u64 generation;
u64 logical; u64 logical;
struct btrfs_device *dev; struct btrfs_device *dev;
is_metadata = !(sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA);
have_csum = sblock->pagev[0]->have_csum;
csum = sblock->pagev[0]->csum;
generation = sblock->pagev[0]->generation;
logical = sblock->pagev[0]->logical; logical = sblock->pagev[0]->logical;
dev = sblock->pagev[0]->dev; dev = sblock->pagev[0]->dev;
if (sblock->no_io_error_seen) { if (sblock->no_io_error_seen)
scrub_recheck_block_checksum(fs_info, sblock, is_metadata, scrub_recheck_block_checksum(sblock);
have_csum, csum, generation,
sctx->csum_size);
}
if (!sblock->no_io_error_seen) { if (!sblock->no_io_error_seen) {
spin_lock(&sctx->stat_lock); spin_lock(&sctx->stat_lock);
sctx->stat.read_errors++; sctx->stat.read_errors++;
spin_unlock(&sctx->stat_lock); spin_unlock(&sctx->stat_lock);
btrfs_err_rl_in_rcu(fs_info, btrfs_err_rl_in_rcu(sctx->dev_root->fs_info,
"IO error rebuilding logical %llu for dev %s", "IO error rebuilding logical %llu for dev %s",
logical, rcu_str_deref(dev->name)); logical, rcu_str_deref(dev->name));
} else if (sblock->header_error || sblock->checksum_error) { } else if (sblock->header_error || sblock->checksum_error) {
spin_lock(&sctx->stat_lock); spin_lock(&sctx->stat_lock);
sctx->stat.uncorrectable_errors++; sctx->stat.uncorrectable_errors++;
spin_unlock(&sctx->stat_lock); spin_unlock(&sctx->stat_lock);
btrfs_err_rl_in_rcu(fs_info, btrfs_err_rl_in_rcu(sctx->dev_root->fs_info,
"failed to rebuild valid logical %llu for dev %s", "failed to rebuild valid logical %llu for dev %s",
logical, rcu_str_deref(dev->name)); logical, rcu_str_deref(dev->name));
} else { } else {
...@@ -2500,8 +2446,7 @@ static void scrub_block_complete(struct scrub_block *sblock) ...@@ -2500,8 +2446,7 @@ static void scrub_block_complete(struct scrub_block *sblock)
} }
} }
static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
u8 *csum)
{ {
struct btrfs_ordered_sum *sum = NULL; struct btrfs_ordered_sum *sum = NULL;
unsigned long index; unsigned long index;
...@@ -2565,7 +2510,7 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, ...@@ -2565,7 +2510,7 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
if (flags & BTRFS_EXTENT_FLAG_DATA) { if (flags & BTRFS_EXTENT_FLAG_DATA) {
/* push csums to sbio */ /* push csums to sbio */
have_csum = scrub_find_csum(sctx, logical, l, csum); have_csum = scrub_find_csum(sctx, logical, csum);
if (have_csum == 0) if (have_csum == 0)
++sctx->stat.no_csum; ++sctx->stat.no_csum;
if (sctx->is_dev_replace && !have_csum) { if (sctx->is_dev_replace && !have_csum) {
...@@ -2703,7 +2648,7 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity, ...@@ -2703,7 +2648,7 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
if (flags & BTRFS_EXTENT_FLAG_DATA) { if (flags & BTRFS_EXTENT_FLAG_DATA) {
/* push csums to sbio */ /* push csums to sbio */
have_csum = scrub_find_csum(sctx, logical, l, csum); have_csum = scrub_find_csum(sctx, logical, csum);
if (have_csum == 0) if (have_csum == 0)
goto skip; goto skip;
} }
...@@ -3012,6 +2957,9 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, ...@@ -3012,6 +2957,9 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
logic_start + map->stripe_len)) { logic_start + map->stripe_len)) {
btrfs_err(fs_info, "scrub: tree block %llu spanning stripes, ignored. logical=%llu", btrfs_err(fs_info, "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
key.objectid, logic_start); key.objectid, logic_start);
spin_lock(&sctx->stat_lock);
sctx->stat.uncorrectable_errors++;
spin_unlock(&sctx->stat_lock);
goto next; goto next;
} }
again: again:
...@@ -3361,6 +3309,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ...@@ -3361,6 +3309,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
"scrub: tree block %llu spanning " "scrub: tree block %llu spanning "
"stripes, ignored. logical=%llu", "stripes, ignored. logical=%llu",
key.objectid, logical); key.objectid, logical);
spin_lock(&sctx->stat_lock);
sctx->stat.uncorrectable_errors++;
spin_unlock(&sctx->stat_lock);
goto next; goto next;
} }
......
...@@ -3400,6 +3400,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) ...@@ -3400,6 +3400,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
u32 count_data = 0; u32 count_data = 0;
u32 count_meta = 0; u32 count_meta = 0;
u32 count_sys = 0; u32 count_sys = 0;
int chunk_reserved = 0;
/* step one make some room on all the devices */ /* step one make some room on all the devices */
devices = &fs_info->fs_devices->devices; devices = &fs_info->fs_devices->devices;
...@@ -3501,6 +3502,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) ...@@ -3501,6 +3502,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
ret = should_balance_chunk(chunk_root, leaf, chunk, ret = should_balance_chunk(chunk_root, leaf, chunk,
found_key.offset); found_key.offset);
btrfs_release_path(path); btrfs_release_path(path);
if (!ret) { if (!ret) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex); mutex_unlock(&fs_info->delete_unused_bgs_mutex);
...@@ -3537,6 +3539,25 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) ...@@ -3537,6 +3539,25 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
goto loop; goto loop;
} }
if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) {
trans = btrfs_start_transaction(chunk_root, 0);
if (IS_ERR(trans)) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
ret = PTR_ERR(trans);
goto error;
}
ret = btrfs_force_chunk_alloc(trans, chunk_root,
BTRFS_BLOCK_GROUP_DATA);
if (ret < 0) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
goto error;
}
btrfs_end_transaction(trans, chunk_root);
chunk_reserved = 1;
}
ret = btrfs_relocate_chunk(chunk_root, ret = btrfs_relocate_chunk(chunk_root,
found_key.offset); found_key.offset);
mutex_unlock(&fs_info->delete_unused_bgs_mutex); mutex_unlock(&fs_info->delete_unused_bgs_mutex);
......
...@@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ...@@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
/* check to make sure this item is what we want */ /* check to make sure this item is what we want */
if (found_key.objectid != key.objectid) if (found_key.objectid != key.objectid)
break; break;
if (found_key.type != BTRFS_XATTR_ITEM_KEY) if (found_key.type > BTRFS_XATTR_ITEM_KEY)
break; break;
if (found_key.type < BTRFS_XATTR_ITEM_KEY)
goto next;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
if (verify_dir_item(root, leaf, di)) if (verify_dir_item(root, leaf, di))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment