Commit ba1da2f4 authored by Chris Mason's avatar Chris Mason

Btrfs: Don't pin pages in ram until the entire ordered extent is on disk.

Checksum items are not inserted until the entire ordered extent is on disk,
but individual pages might be clean and available for reclaim long before
the whole extent is on disk.

In order to allow those pages to be freed, we need to be able to search
the list of ordered extents to find the checksum that is going to be inserted
in the tree.  This way if the page needs to be read back in before
the checksums are in the btree, we'll be able to verify the checksum on
the page.

This commit adds the ability to search the pending ordered extents for
a given offset in the file, and changes btrfs_releasepage to allow
ordered pages to be freed.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent f9295749
...@@ -251,7 +251,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, ...@@ -251,7 +251,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
end_of_last_block = start_pos + num_bytes - 1; end_of_last_block = start_pos + num_bytes - 1;
lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
trans = btrfs_join_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
if (!trans) { if (!trans) {
err = -ENOMEM; err = -ENOMEM;
goto out_unlock; goto out_unlock;
......
...@@ -382,7 +382,7 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ...@@ -382,7 +382,7 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
return btrfs_map_bio(root, rw, bio, mirror_num, 0); return btrfs_map_bio(root, rw, bio, mirror_num, 0);
} }
static int add_pending_csums(struct btrfs_trans_handle *trans, static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
struct inode *inode, u64 file_offset, struct inode *inode, u64 file_offset,
struct list_head *list) struct list_head *list)
{ {
...@@ -390,15 +390,12 @@ static int add_pending_csums(struct btrfs_trans_handle *trans, ...@@ -390,15 +390,12 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum *sum; struct btrfs_ordered_sum *sum;
btrfs_set_trans_block_group(trans, inode); btrfs_set_trans_block_group(trans, inode);
while(!list_empty(list)) { list_for_each(cur, list) {
cur = list->next;
sum = list_entry(cur, struct btrfs_ordered_sum, list); sum = list_entry(cur, struct btrfs_ordered_sum, list);
mutex_lock(&BTRFS_I(inode)->csum_mutex); mutex_lock(&BTRFS_I(inode)->csum_mutex);
btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root, btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root,
inode, sum); inode, sum);
mutex_unlock(&BTRFS_I(inode)->csum_mutex); mutex_unlock(&BTRFS_I(inode)->csum_mutex);
list_del(&sum->list);
kfree(sum);
} }
return 0; return 0;
} }
...@@ -498,9 +495,8 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, ...@@ -498,9 +495,8 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
int ret; int ret;
ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1);
if (!ret) { if (!ret)
return 0; return 0;
}
trans = btrfs_join_transaction(root, 1); trans = btrfs_join_transaction(root, 1);
...@@ -571,6 +567,18 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) ...@@ -571,6 +567,18 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
path = btrfs_alloc_path(); path = btrfs_alloc_path();
item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
if (IS_ERR(item)) { if (IS_ERR(item)) {
/*
* It is possible there is an ordered extent that has
* not yet finished for this range in the file. If so,
* that extent will have a csum cached, and it will insert
* the sum after all the blocks in the extent are fully
* on disk. So, look for an ordered extent and use the
* sum if found.
*/
ret = btrfs_find_ordered_sum(inode, start, &csum);
if (ret == 0)
goto found;
ret = PTR_ERR(item); ret = PTR_ERR(item);
/* a csum that isn't present is a preallocated region. */ /* a csum that isn't present is a preallocated region. */
if (ret == -ENOENT || ret == -EFBIG) if (ret == -ENOENT || ret == -EFBIG)
...@@ -582,6 +590,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) ...@@ -582,6 +590,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
} }
read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
BTRFS_CRC32_SIZE); BTRFS_CRC32_SIZE);
found:
set_state_private(io_tree, start, csum); set_state_private(io_tree, start, csum);
out: out:
if (path) if (path)
...@@ -888,7 +897,7 @@ static void fill_inode_item(struct extent_buffer *leaf, ...@@ -888,7 +897,7 @@ static void fill_inode_item(struct extent_buffer *leaf,
BTRFS_I(inode)->block_group->key.objectid); BTRFS_I(inode)->block_group->key.objectid);
} }
int btrfs_update_inode(struct btrfs_trans_handle *trans, int noinline btrfs_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct inode *inode) struct inode *inode)
{ {
...@@ -1567,6 +1576,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) ...@@ -1567,6 +1576,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
inode->i_mapping, GFP_NOFS); inode->i_mapping, GFP_NOFS);
extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
inode->i_mapping, GFP_NOFS); inode->i_mapping, GFP_NOFS);
btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->csum_mutex);
return 0; return 0;
} }
...@@ -1868,6 +1878,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, ...@@ -1868,6 +1878,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode->i_mapping, GFP_NOFS); inode->i_mapping, GFP_NOFS);
extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
inode->i_mapping, GFP_NOFS); inode->i_mapping, GFP_NOFS);
btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
mutex_init(&BTRFS_I(inode)->csum_mutex); mutex_init(&BTRFS_I(inode)->csum_mutex);
BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->delalloc_bytes = 0;
BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->disk_i_size = 0;
...@@ -2097,6 +2108,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, ...@@ -2097,6 +2108,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->delalloc_bytes = 0;
BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->disk_i_size = 0;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
} }
dir->i_sb->s_dirt = 1; dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, inode);
...@@ -2618,14 +2630,6 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) ...@@ -2618,14 +2630,6 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{ {
struct btrfs_ordered_extent *ordered;
ordered = btrfs_lookup_ordered_extent(page->mapping->host,
page_offset(page));
if (ordered) {
btrfs_put_ordered_extent(ordered);
return 0;
}
return __btrfs_releasepage(page, gfp_flags); return __btrfs_releasepage(page, gfp_flags);
} }
...@@ -3078,6 +3082,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, ...@@ -3078,6 +3082,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
BTRFS_I(inode)->delalloc_bytes = 0; BTRFS_I(inode)->delalloc_bytes = 0;
BTRFS_I(inode)->disk_i_size = 0; BTRFS_I(inode)->disk_i_size = 0;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
} }
dir->i_sb->s_dirt = 1; dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, inode);
......
...@@ -245,8 +245,18 @@ printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file ...@@ -245,8 +245,18 @@ printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file
int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
{ {
if (atomic_dec_and_test(&entry->refs)) struct list_head *cur;
struct btrfs_ordered_sum *sum;
if (atomic_dec_and_test(&entry->refs)) {
while(!list_empty(&entry->list)) {
cur = entry->list.next;
sum = list_entry(cur, struct btrfs_ordered_sum, list);
list_del(&sum->list);
kfree(sum);
}
kfree(entry); kfree(entry);
}
return 0; return 0;
} }
...@@ -444,8 +454,9 @@ int btrfs_ordered_update_i_size(struct inode *inode, ...@@ -444,8 +454,9 @@ int btrfs_ordered_update_i_size(struct inode *inode,
* if we find an ordered extent then we can't update disk i_size * if we find an ordered extent then we can't update disk i_size
* yet * yet
*/ */
node = &ordered->rb_node;
while(1) { while(1) {
node = rb_prev(&ordered->rb_node); node = rb_prev(node);
if (!node) if (!node)
break; break;
test = rb_entry(node, struct btrfs_ordered_extent, rb_node); test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
...@@ -495,3 +506,36 @@ int btrfs_ordered_update_i_size(struct inode *inode, ...@@ -495,3 +506,36 @@ int btrfs_ordered_update_i_size(struct inode *inode,
mutex_unlock(&tree->mutex); mutex_unlock(&tree->mutex);
return 0; return 0;
} }
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
{
struct btrfs_ordered_sum *ordered_sum;
struct btrfs_sector_sum *sector_sums;
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
struct list_head *cur;
int ret = 1;
int index;
ordered = btrfs_lookup_ordered_extent(inode, offset);
if (!ordered)
return 1;
mutex_lock(&tree->mutex);
list_for_each_prev(cur, &ordered->list) {
ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list);
if (offset >= ordered_sum->file_offset &&
offset < ordered_sum->file_offset + ordered_sum->len) {
index = (offset - ordered_sum->file_offset) /
BTRFS_I(inode)->root->sectorsize;;
sector_sums = &ordered_sum->sums;
*sum = sector_sums[index].sum;
ret = 0;
goto out;
}
}
out:
mutex_unlock(&tree->mutex);
return ret;
}
...@@ -91,4 +91,5 @@ int btrfs_add_ordered_pending(struct inode *inode, ...@@ -91,4 +91,5 @@ int btrfs_add_ordered_pending(struct inode *inode,
u64 start, u64 len); u64 start, u64 len);
int btrfs_ordered_update_i_size(struct inode *inode, int btrfs_ordered_update_i_size(struct inode *inode,
struct btrfs_ordered_extent *ordered); struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum);
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment