Commit 29fa89d0 authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Theodore Ts'o

ext4: Mark the unwritten buffer_head as mapped during write_begin

Setting BH_Unwritten buffer_heads as BH_Mapped avoids multiple
(unnecessary) calls to get_block() during the call to the write(2)
system call.  Setting BH_Unwritten buffer heads as BH_Mapped requires
that the writepages() functions can handle BH_Unwritten buffer_heads.

After this commit, things work as follows:

ext4_ext_get_block() returns unmapped, unwritten, buffer head when
called with create = 0 for prealloc space. This makes sure we handle
the read path and non-delayed allocation case correctly.  Even though
the buffer head is marked unmapped we have valid b_blocknr and b_bdev
values in the buffer_head.

ext4_da_get_block_prep() called for block resrevation will now return
mapped, unwritten, new buffer_head for prealloc space. This avoids
multiple calls to get_block() for write to same offset. By making such
buffers as BH_New, we also assure that sub-block zeroing of buffered
writes happens correctly.
Signed-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 8fb0e342
...@@ -2872,6 +2872,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -2872,6 +2872,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
if (create == EXT4_CREATE_UNINITIALIZED_EXT) if (create == EXT4_CREATE_UNINITIALIZED_EXT)
goto out; goto out;
if (!create) { if (!create) {
if (allocated > max_blocks)
allocated = max_blocks;
/* /*
* We have blocks reserved already. We * We have blocks reserved already. We
* return allocated blocks so that delalloc * return allocated blocks so that delalloc
...@@ -2879,8 +2881,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -2879,8 +2881,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* the buffer head will be unmapped so that * the buffer head will be unmapped so that
* a read from the block returns 0s. * a read from the block returns 0s.
*/ */
if (allocated > max_blocks)
allocated = max_blocks;
set_buffer_unwritten(bh_result); set_buffer_unwritten(bh_result);
bh_result->b_bdev = inode->i_sb->s_bdev; bh_result->b_bdev = inode->i_sb->s_bdev;
bh_result->b_blocknr = newblock; bh_result->b_blocknr = newblock;
......
...@@ -1852,7 +1852,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) ...@@ -1852,7 +1852,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
* @logical - first logical block to start assignment with * @logical - first logical block to start assignment with
* *
* the function goes through all passed space and put actual disk * the function goes through all passed space and put actual disk
* block numbers into buffer heads, dropping BH_Delay * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
*/ */
static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
struct buffer_head *exbh) struct buffer_head *exbh)
...@@ -1902,16 +1902,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, ...@@ -1902,16 +1902,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
do { do {
if (cur_logical >= logical + blocks) if (cur_logical >= logical + blocks)
break; break;
if (buffer_delay(bh)) {
bh->b_blocknr = pblock; if (buffer_delay(bh) ||
clear_buffer_delay(bh); buffer_unwritten(bh)) {
bh->b_bdev = inode->i_sb->s_bdev;
} else if (buffer_unwritten(bh)) { BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
bh->b_blocknr = pblock;
clear_buffer_unwritten(bh); if (buffer_delay(bh)) {
set_buffer_mapped(bh); clear_buffer_delay(bh);
set_buffer_new(bh); bh->b_blocknr = pblock;
bh->b_bdev = inode->i_sb->s_bdev; } else {
/*
* unwritten already should have
* blocknr assigned. Verify that
*/
clear_buffer_unwritten(bh);
BUG_ON(bh->b_blocknr != pblock);
}
} else if (buffer_mapped(bh)) } else if (buffer_mapped(bh))
BUG_ON(bh->b_blocknr != pblock); BUG_ON(bh->b_blocknr != pblock);
...@@ -2053,7 +2061,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) ...@@ -2053,7 +2061,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
* We consider only non-mapped and non-allocated blocks * We consider only non-mapped and non-allocated blocks
*/ */
if ((mpd->b_state & (1 << BH_Mapped)) && if ((mpd->b_state & (1 << BH_Mapped)) &&
!(mpd->b_state & (1 << BH_Delay))) !(mpd->b_state & (1 << BH_Delay)) &&
!(mpd->b_state & (1 << BH_Unwritten)))
return 0; return 0;
/* /*
* We need to make sure the BH_Delay flag is passed down to * We need to make sure the BH_Delay flag is passed down to
...@@ -2205,6 +2214,17 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, ...@@ -2205,6 +2214,17 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
return; return;
} }
static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
{
/*
* unmapped buffer is possible for holes.
* delay buffer is possible with delayed allocation.
* We also need to consider unwritten buffer as unmapped.
*/
return (!buffer_mapped(bh) || buffer_delay(bh) ||
buffer_unwritten(bh)) && buffer_dirty(bh);
}
/* /*
* __mpage_da_writepage - finds extent of pages and blocks * __mpage_da_writepage - finds extent of pages and blocks
* *
...@@ -2289,8 +2309,7 @@ static int __mpage_da_writepage(struct page *page, ...@@ -2289,8 +2309,7 @@ static int __mpage_da_writepage(struct page *page,
* Otherwise we won't make progress * Otherwise we won't make progress
* with the page in ext4_da_writepage * with the page in ext4_da_writepage
*/ */
if (buffer_dirty(bh) && if (ext4_bh_unmapped_or_delay(NULL, bh)) {
(!buffer_mapped(bh) || buffer_delay(bh))) {
mpage_add_bh_to_extent(mpd, logical, mpage_add_bh_to_extent(mpd, logical,
bh->b_size, bh->b_size,
bh->b_state); bh->b_state);
...@@ -2318,6 +2337,14 @@ static int __mpage_da_writepage(struct page *page, ...@@ -2318,6 +2337,14 @@ static int __mpage_da_writepage(struct page *page,
/* /*
* this is a special callback for ->write_begin() only * this is a special callback for ->write_begin() only
* it's intention is to return mapped block or reserve space * it's intention is to return mapped block or reserve space
*
* For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
* We also have b_blocknr = -1 and b_bdev initialized properly
*
* For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
* We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
* initialized properly.
*
*/ */
static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create) struct buffer_head *bh_result, int create)
...@@ -2353,28 +2380,23 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, ...@@ -2353,28 +2380,23 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
set_buffer_delay(bh_result); set_buffer_delay(bh_result);
} else if (ret > 0) { } else if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits); bh_result->b_size = (ret << inode->i_blkbits);
/* if (buffer_unwritten(bh_result)) {
* With sub-block writes into unwritten extents /* A delayed write to unwritten bh should
* we also need to mark the buffer as new so that * be marked new and mapped. Mapped ensures
* the unwritten parts of the buffer gets correctly zeroed. * that we don't do get_block multiple times
*/ * when we write to the same offset and new
if (buffer_unwritten(bh_result)) * ensures that we do proper zero out for
* partial write.
*/
set_buffer_new(bh_result); set_buffer_new(bh_result);
set_buffer_mapped(bh_result);
}
ret = 0; ret = 0;
} }
return ret; return ret;
} }
static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
{
/*
* unmapped buffer is possible for holes.
* delay buffer is possible with delayed allocation
*/
return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh));
}
static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create) struct buffer_head *bh_result, int create)
{ {
...@@ -2828,7 +2850,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, ...@@ -2828,7 +2850,7 @@ static int ext4_da_should_update_i_disksize(struct page *page,
for (i = 0; i < idx; i++) for (i = 0; i < idx; i++)
bh = bh->b_this_page; bh = bh->b_this_page;
if (!buffer_mapped(bh) || (buffer_delay(bh))) if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
return 0; return 0;
return 1; return 1;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment