Commit 0b32f4bb authored by Josef Bacik's avatar Josef Bacik Committed by Chris Mason

Btrfs: ensure an entire eb is written at once

This patch simplifies how we track our extent buffers.  Previously we could exit
writepages with only having written half of an extent buffer, which meant we had
to track the state of the pages and the state of the extent buffers differently.
Now we only read in entire extent buffers and write out entire extent buffers,
this allows us to simply set bits in our bflags to indicate the state of the eb
and we no longer have to do things like track uptodate with our iotree.  Thanks,
Signed-off-by: default avatarJosef Bacik <josef@redhat.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 5df4235e
...@@ -333,7 +333,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, ...@@ -333,7 +333,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
0, &cached_state, GFP_NOFS); 0, &cached_state, GFP_NOFS);
if (extent_buffer_uptodate(io_tree, eb, cached_state) && if (extent_buffer_uptodate(eb) &&
btrfs_header_generation(eb) == parent_transid) { btrfs_header_generation(eb) == parent_transid) {
ret = 0; ret = 0;
goto out; goto out;
...@@ -344,7 +344,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, ...@@ -344,7 +344,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
(unsigned long long)parent_transid, (unsigned long long)parent_transid,
(unsigned long long)btrfs_header_generation(eb)); (unsigned long long)btrfs_header_generation(eb));
ret = 1; ret = 1;
clear_extent_buffer_uptodate(io_tree, eb, &cached_state); clear_extent_buffer_uptodate(eb);
out: out:
unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
&cached_state, GFP_NOFS); &cached_state, GFP_NOFS);
...@@ -566,7 +566,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, ...@@ -566,7 +566,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
tree = &BTRFS_I(page->mapping->host)->io_tree; tree = &BTRFS_I(page->mapping->host)->io_tree;
eb = (struct extent_buffer *)page->private; eb = (struct extent_buffer *)page->private;
reads_done = atomic_dec_and_test(&eb->pages_reading); /* the pending IO might have been the only thing that kept this buffer
* in memory. Make sure we have a ref for all this other checks
*/
extent_buffer_get(eb);
reads_done = atomic_dec_and_test(&eb->io_pages);
if (!reads_done) if (!reads_done)
goto err; goto err;
...@@ -606,14 +611,17 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, ...@@ -606,14 +611,17 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
ret = -EIO; ret = -EIO;
} }
if (!ret)
set_extent_buffer_uptodate(eb);
err: err:
if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
btree_readahead_hook(root, eb, eb->start, ret); btree_readahead_hook(root, eb, eb->start, ret);
} }
if (ret && eb) if (ret)
clear_extent_buffer_uptodate(tree, eb, NULL); clear_extent_buffer_uptodate(eb);
free_extent_buffer(eb);
out: out:
return ret; return ret;
} }
...@@ -878,20 +886,6 @@ static int btree_migratepage(struct address_space *mapping, ...@@ -878,20 +886,6 @@ static int btree_migratepage(struct address_space *mapping,
} }
#endif #endif
static int btree_writepage(struct page *page, struct writeback_control *wbc)
{
struct extent_io_tree *tree;
tree = &BTRFS_I(page->mapping->host)->io_tree;
if (!(current->flags & PF_MEMALLOC)) {
return extent_write_full_page(tree, page,
btree_get_extent, wbc);
}
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
}
static int btree_writepages(struct address_space *mapping, static int btree_writepages(struct address_space *mapping,
struct writeback_control *wbc) struct writeback_control *wbc)
...@@ -911,7 +905,7 @@ static int btree_writepages(struct address_space *mapping, ...@@ -911,7 +905,7 @@ static int btree_writepages(struct address_space *mapping,
if (num_dirty < thresh) if (num_dirty < thresh)
return 0; return 0;
} }
return extent_writepages(tree, mapping, btree_get_extent, wbc); return btree_write_cache_pages(mapping, wbc);
} }
static int btree_readpage(struct file *file, struct page *page) static int btree_readpage(struct file *file, struct page *page)
...@@ -950,15 +944,28 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) ...@@ -950,15 +944,28 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
} }
} }
static int btree_set_page_dirty(struct page *page)
{
struct extent_buffer *eb;
BUG_ON(!PagePrivate(page));
eb = (struct extent_buffer *)page->private;
BUG_ON(!eb);
BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
BUG_ON(!atomic_read(&eb->refs));
btrfs_assert_tree_locked(eb);
return __set_page_dirty_nobuffers(page);
}
static const struct address_space_operations btree_aops = { static const struct address_space_operations btree_aops = {
.readpage = btree_readpage, .readpage = btree_readpage,
.writepage = btree_writepage,
.writepages = btree_writepages, .writepages = btree_writepages,
.releasepage = btree_releasepage, .releasepage = btree_releasepage,
.invalidatepage = btree_invalidatepage, .invalidatepage = btree_invalidatepage,
#ifdef CONFIG_MIGRATION #ifdef CONFIG_MIGRATION
.migratepage = btree_migratepage, .migratepage = btree_migratepage,
#endif #endif
.set_page_dirty = btree_set_page_dirty,
}; };
int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
...@@ -1001,7 +1008,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, ...@@ -1001,7 +1008,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
free_extent_buffer(buf); free_extent_buffer(buf);
return -EIO; return -EIO;
} else if (extent_buffer_uptodate(io_tree, buf, NULL)) { } else if (extent_buffer_uptodate(buf)) {
*eb = buf; *eb = buf;
} else { } else {
free_extent_buffer(buf); free_extent_buffer(buf);
...@@ -1054,9 +1061,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, ...@@ -1054,9 +1061,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
return NULL; return NULL;
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
if (ret == 0)
set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
return buf; return buf;
} }
...@@ -1064,7 +1068,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, ...@@ -1064,7 +1068,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf) struct extent_buffer *buf)
{ {
struct inode *btree_inode = root->fs_info->btree_inode;
if (btrfs_header_generation(buf) == if (btrfs_header_generation(buf) ==
root->fs_info->running_transaction->transid) { root->fs_info->running_transaction->transid) {
btrfs_assert_tree_locked(buf); btrfs_assert_tree_locked(buf);
...@@ -1080,8 +1083,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, ...@@ -1080,8 +1083,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
/* ugh, clear_extent_buffer_dirty needs to lock the page */ /* ugh, clear_extent_buffer_dirty needs to lock the page */
btrfs_set_lock_blocking(buf); btrfs_set_lock_blocking(buf);
clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, clear_extent_buffer_dirty(buf);
buf);
} }
return 0; return 0;
} }
...@@ -1948,6 +1950,7 @@ int open_ctree(struct super_block *sb, ...@@ -1948,6 +1950,7 @@ int open_ctree(struct super_block *sb,
RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
fs_info->btree_inode->i_mapping); fs_info->btree_inode->i_mapping);
BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree); extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
...@@ -3058,8 +3061,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) ...@@ -3058,8 +3061,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
int ret; int ret;
struct inode *btree_inode = buf->pages[0]->mapping->host; struct inode *btree_inode = buf->pages[0]->mapping->host;
ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf, ret = extent_buffer_uptodate(buf);
NULL);
if (!ret) if (!ret)
return ret; return ret;
...@@ -3070,16 +3072,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) ...@@ -3070,16 +3072,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
int btrfs_set_buffer_uptodate(struct extent_buffer *buf) int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
{ {
struct inode *btree_inode = buf->pages[0]->mapping->host; return set_extent_buffer_uptodate(buf);
return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
buf);
} }
void btrfs_mark_buffer_dirty(struct extent_buffer *buf) void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{ {
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
u64 transid = btrfs_header_generation(buf); u64 transid = btrfs_header_generation(buf);
struct inode *btree_inode = root->fs_info->btree_inode;
int was_dirty; int was_dirty;
btrfs_assert_tree_locked(buf); btrfs_assert_tree_locked(buf);
...@@ -3091,8 +3090,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) ...@@ -3091,8 +3090,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
(unsigned long long)root->fs_info->generation); (unsigned long long)root->fs_info->generation);
WARN_ON(1); WARN_ON(1);
} }
was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, was_dirty = set_extent_buffer_dirty(buf);
buf);
if (!was_dirty) { if (!was_dirty) {
spin_lock(&root->fs_info->delalloc_lock); spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->dirty_metadata_bytes += buf->len; root->fs_info->dirty_metadata_bytes += buf->len;
...@@ -3147,11 +3145,7 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) ...@@ -3147,11 +3145,7 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
{ {
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
int ret; return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
if (ret == 0)
set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
return ret;
} }
static int btree_lock_page_hook(struct page *page, void *data, static int btree_lock_page_hook(struct page *page, void *data,
......
This diff is collapsed.
...@@ -37,6 +37,8 @@ ...@@ -37,6 +37,8 @@
#define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ #define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */
#define EXTENT_BUFFER_TREE_REF 5 #define EXTENT_BUFFER_TREE_REF 5
#define EXTENT_BUFFER_STALE 6 #define EXTENT_BUFFER_STALE 6
#define EXTENT_BUFFER_WRITEBACK 7
#define EXTENT_BUFFER_IOERR 8
/* these are flags for extent_clear_unlock_delalloc */ /* these are flags for extent_clear_unlock_delalloc */
#define EXTENT_CLEAR_UNLOCK_PAGE 0x1 #define EXTENT_CLEAR_UNLOCK_PAGE 0x1
...@@ -99,6 +101,7 @@ struct extent_io_tree { ...@@ -99,6 +101,7 @@ struct extent_io_tree {
struct radix_tree_root buffer; struct radix_tree_root buffer;
struct address_space *mapping; struct address_space *mapping;
u64 dirty_bytes; u64 dirty_bytes;
int track_uptodate;
spinlock_t lock; spinlock_t lock;
spinlock_t buffer_lock; spinlock_t buffer_lock;
struct extent_io_ops *ops; struct extent_io_ops *ops;
...@@ -132,7 +135,7 @@ struct extent_buffer { ...@@ -132,7 +135,7 @@ struct extent_buffer {
struct extent_io_tree *tree; struct extent_io_tree *tree;
spinlock_t refs_lock; spinlock_t refs_lock;
atomic_t refs; atomic_t refs;
atomic_t pages_reading; atomic_t io_pages;
struct list_head leak_list; struct list_head leak_list;
struct rcu_head rcu_head; struct rcu_head rcu_head;
pid_t lock_owner; pid_t lock_owner;
...@@ -249,6 +252,8 @@ int extent_writepages(struct extent_io_tree *tree, ...@@ -249,6 +252,8 @@ int extent_writepages(struct extent_io_tree *tree,
struct address_space *mapping, struct address_space *mapping,
get_extent_t *get_extent, get_extent_t *get_extent,
struct writeback_control *wbc); struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
int extent_readpages(struct extent_io_tree *tree, int extent_readpages(struct extent_io_tree *tree,
struct address_space *mapping, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages, struct list_head *pages, unsigned nr_pages,
...@@ -297,18 +302,11 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, ...@@ -297,18 +302,11 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
void memset_extent_buffer(struct extent_buffer *eb, char c, void memset_extent_buffer(struct extent_buffer *eb, char c,
unsigned long start, unsigned long len); unsigned long start, unsigned long len);
int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits); int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
int clear_extent_buffer_dirty(struct extent_io_tree *tree, int clear_extent_buffer_dirty(struct extent_buffer *eb);
struct extent_buffer *eb); int set_extent_buffer_dirty(struct extent_buffer *eb);
int set_extent_buffer_dirty(struct extent_io_tree *tree, int set_extent_buffer_uptodate(struct extent_buffer *eb);
struct extent_buffer *eb); int clear_extent_buffer_uptodate(struct extent_buffer *eb);
int set_extent_buffer_uptodate(struct extent_io_tree *tree, int extent_buffer_uptodate(struct extent_buffer *eb);
struct extent_buffer *eb);
int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
struct extent_buffer *eb,
struct extent_state **cached_state);
int extent_buffer_uptodate(struct extent_io_tree *tree,
struct extent_buffer *eb,
struct extent_state *cached_state);
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long min_len, char **map, unsigned long min_len, char **map,
unsigned long *map_start, unsigned long *map_start,
......
...@@ -6782,6 +6782,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ...@@ -6782,6 +6782,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
extent_map_tree_init(&ei->extent_tree); extent_map_tree_init(&ei->extent_tree);
extent_io_tree_init(&ei->io_tree, &inode->i_data); extent_io_tree_init(&ei->io_tree, &inode->i_data);
extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
ei->io_tree.track_uptodate = 1;
ei->io_failure_tree.track_uptodate = 1;
mutex_init(&ei->log_mutex); mutex_init(&ei->log_mutex);
mutex_init(&ei->delalloc_mutex); mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree); btrfs_ordered_inode_tree_init(&ei->ordered_tree);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment