Commit 4a096752 authored by Chris Mason's avatar Chris Mason

Btrfs: Data ordered fixes

* In btrfs_delete_inode, wait for ordered extents after calling
truncate_inode_pages.  This is much faster, and more correct

* Properly clear our the PageChecked bit everywhere we redirty the page.

* Change the writepage fixup handler to lock the page range and check to
see if an ordered extent had been inserted since the improperly dirtied
page was discovered

* Wait for ordered extents outside the transaction.  This isn't required
for locking rules but does improve transaction latencies

* Reduce contention on the alloc_mutex by dropping it while incrementing
refs on a node/leaf and while dropping refs on a leaf.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent e5a2217e
...@@ -934,7 +934,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, ...@@ -934,7 +934,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (!root->ref_cows) if (!root->ref_cows)
return 0; return 0;
mutex_lock(&root->fs_info->alloc_mutex);
level = btrfs_header_level(buf); level = btrfs_header_level(buf);
nritems = btrfs_header_nritems(buf); nritems = btrfs_header_nritems(buf);
for (i = 0; i < nritems; i++) { for (i = 0; i < nritems; i++) {
...@@ -951,29 +950,36 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, ...@@ -951,29 +950,36 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
if (disk_bytenr == 0) if (disk_bytenr == 0)
continue; continue;
mutex_lock(&root->fs_info->alloc_mutex);
ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr, ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
btrfs_file_extent_disk_num_bytes(buf, fi), btrfs_file_extent_disk_num_bytes(buf, fi),
root->root_key.objectid, trans->transid, root->root_key.objectid, trans->transid,
key.objectid, key.offset); key.objectid, key.offset);
mutex_unlock(&root->fs_info->alloc_mutex);
if (ret) { if (ret) {
faili = i; faili = i;
WARN_ON(1);
goto fail; goto fail;
} }
} else { } else {
bytenr = btrfs_node_blockptr(buf, i); bytenr = btrfs_node_blockptr(buf, i);
btrfs_node_key_to_cpu(buf, &key, i); btrfs_node_key_to_cpu(buf, &key, i);
mutex_lock(&root->fs_info->alloc_mutex);
ret = __btrfs_inc_extent_ref(trans, root, bytenr, ret = __btrfs_inc_extent_ref(trans, root, bytenr,
btrfs_level_size(root, level - 1), btrfs_level_size(root, level - 1),
root->root_key.objectid, root->root_key.objectid,
trans->transid, trans->transid,
level - 1, key.objectid); level - 1, key.objectid);
mutex_unlock(&root->fs_info->alloc_mutex);
if (ret) { if (ret) {
faili = i; faili = i;
WARN_ON(1);
goto fail; goto fail;
} }
} }
} }
mutex_unlock(&root->fs_info->alloc_mutex);
return 0; return 0;
fail: fail:
WARN_ON(1); WARN_ON(1);
...@@ -1004,7 +1010,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, ...@@ -1004,7 +1010,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
} }
} }
#endif #endif
mutex_unlock(&root->fs_info->alloc_mutex);
return ret; return ret;
} }
...@@ -2180,6 +2185,8 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, ...@@ -2180,6 +2185,8 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
leaf_owner = btrfs_header_owner(leaf); leaf_owner = btrfs_header_owner(leaf);
leaf_generation = btrfs_header_generation(leaf); leaf_generation = btrfs_header_generation(leaf);
mutex_unlock(&root->fs_info->alloc_mutex);
for (i = 0; i < nritems; i++) { for (i = 0; i < nritems; i++) {
u64 disk_bytenr; u64 disk_bytenr;
...@@ -2197,12 +2204,17 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, ...@@ -2197,12 +2204,17 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
if (disk_bytenr == 0) if (disk_bytenr == 0)
continue; continue;
mutex_lock(&root->fs_info->alloc_mutex);
ret = __btrfs_free_extent(trans, root, disk_bytenr, ret = __btrfs_free_extent(trans, root, disk_bytenr,
btrfs_file_extent_disk_num_bytes(leaf, fi), btrfs_file_extent_disk_num_bytes(leaf, fi),
leaf_owner, leaf_generation, leaf_owner, leaf_generation,
key.objectid, key.offset, 0); key.objectid, key.offset, 0);
mutex_unlock(&root->fs_info->alloc_mutex);
BUG_ON(ret); BUG_ON(ret);
} }
mutex_lock(&root->fs_info->alloc_mutex);
return 0; return 0;
} }
......
...@@ -75,6 +75,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) ...@@ -75,6 +75,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
if (!pages[i]) if (!pages[i])
break; break;
ClearPageChecked(pages[i]);
unlock_page(pages[i]); unlock_page(pages[i]);
mark_page_accessed(pages[i]); mark_page_accessed(pages[i]);
page_cache_release(pages[i]); page_cache_release(pages[i]);
......
...@@ -418,7 +418,7 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work) ...@@ -418,7 +418,7 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work)
fixup = container_of(work, struct btrfs_writepage_fixup, work); fixup = container_of(work, struct btrfs_writepage_fixup, work);
page = fixup->page; page = fixup->page;
again:
lock_page(page); lock_page(page);
if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
ClearPageChecked(page); ClearPageChecked(page);
...@@ -430,9 +430,21 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work) ...@@ -430,9 +430,21 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work)
page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
ordered = btrfs_lookup_ordered_extent(inode, page_start);
if (ordered) /* already ordered? We're done */
if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
EXTENT_ORDERED, 0)) {
goto out; goto out;
}
ordered = btrfs_lookup_ordered_extent(inode, page_start);
if (ordered) {
unlock_extent(&BTRFS_I(inode)->io_tree, page_start,
page_end, GFP_NOFS);
unlock_page(page);
btrfs_start_ordered_extent(inode, ordered, 1);
goto again;
}
set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end,
GFP_NOFS); GFP_NOFS);
...@@ -1465,11 +1477,11 @@ void btrfs_delete_inode(struct inode *inode) ...@@ -1465,11 +1477,11 @@ void btrfs_delete_inode(struct inode *inode)
unsigned long nr; unsigned long nr;
int ret; int ret;
btrfs_wait_ordered_range(inode, 0, (u64)-1);
truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode->i_data, 0);
if (is_bad_inode(inode)) { if (is_bad_inode(inode)) {
goto no_delete; goto no_delete;
} }
btrfs_wait_ordered_range(inode, 0, (u64)-1);
btrfs_i_size_write(inode, 0); btrfs_i_size_write(inode, 0);
trans = btrfs_start_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
...@@ -2707,6 +2719,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) ...@@ -2707,6 +2719,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
1, 1, GFP_NOFS); 1, 1, GFP_NOFS);
__btrfs_releasepage(page, GFP_NOFS); __btrfs_releasepage(page, GFP_NOFS);
ClearPageChecked(page);
if (PagePrivate(page)) { if (PagePrivate(page)) {
invalidate_extent_lru(tree, page_offset(page), invalidate_extent_lru(tree, page_offset(page),
PAGE_CACHE_SIZE); PAGE_CACHE_SIZE);
...@@ -2818,10 +2831,10 @@ static void btrfs_truncate(struct inode *inode) ...@@ -2818,10 +2831,10 @@ static void btrfs_truncate(struct inode *inode)
return; return;
btrfs_truncate_page(inode->i_mapping, inode->i_size); btrfs_truncate_page(inode->i_mapping, inode->i_size);
btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
trans = btrfs_start_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode); btrfs_set_trans_block_group(trans, inode);
btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
btrfs_i_size_write(inode, inode->i_size); btrfs_i_size_write(inode, inode->i_size);
/* FIXME, add redo link to tree so we don't leak on crash */ /* FIXME, add redo link to tree so we don't leak on crash */
......
...@@ -336,7 +336,7 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) ...@@ -336,7 +336,7 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
orig_end = start + len - 1; orig_end = start + len - 1;
wait_end = orig_end; wait_end = orig_end;
} }
again:
/* start IO across the range first to instantiate any delalloc /* start IO across the range first to instantiate any delalloc
* extents * extents
*/ */
...@@ -369,6 +369,14 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) ...@@ -369,6 +369,14 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
break; break;
end--; end--;
} }
if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
EXTENT_ORDERED | EXTENT_DELALLOC, 0)) {
printk("inode %lu still ordered or delalloc after wait "
"%llu %llu\n", inode->i_ino,
(unsigned long long)start,
(unsigned long long)orig_end);
goto again;
}
} }
/* /*
...@@ -545,7 +553,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) ...@@ -545,7 +553,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
sector_sums = &ordered_sum->sums; sector_sums = &ordered_sum->sums;
for (i = 0; i < num_sectors; i++) { for (i = 0; i < num_sectors; i++) {
if (sector_sums[i].offset == offset) { if (sector_sums[i].offset == offset) {
printk("find ordered sum inode %lu offset %Lu\n", inode->i_ino, offset);
*sum = sector_sums[i].sum; *sum = sector_sums[i].sum;
ret = 0; ret = 0;
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment