Commit fe55c452 authored by Mingming Cao's avatar Mingming Cao Committed by Linus Torvalds

[PATCH] ext3: remove unnecessary race then retry in ext3_get_block

The extra race-with-truncate-then-retry logic around
ext3_get_block_handle(), which was inherited from ext2, becomes unecessary
for ext3, since we have already obtained the ei->truncate_sem in
ext3_get_block_handle() before calling ext3_alloc_branch().  The
ei->truncate_sem is already there to block concurrent truncate and block
allocation on the same inode.  So the inode's indirect addressing tree
won't be changed after we grab that semaphore.

We could, after get the semaphore, re-verify the branch is up-to-date or
not.  If it has been changed, then get the updated branch.  If we still
need block allocation, we will have a safe version of the branch to work
with in the ext3_find_goal()/ext3_splice_branch().

The code becomes more readable after remove those retry logic.  The patch
also clean up some gotos in ext3_get_block_handle() to make it more
readable.
Signed-off-by: default avatarMingming Cao <cmm@us.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent faf8b249
...@@ -455,12 +455,11 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind) ...@@ -455,12 +455,11 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
* @goal: place to store the result. * @goal: place to store the result.
* *
* Normally this function find the prefered place for block allocation, * Normally this function find the prefered place for block allocation,
* stores it in *@goal and returns zero. If the branch had been changed * stores it in *@goal and returns zero.
* under us we return -EAGAIN.
*/ */
static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4], static unsigned long ext3_find_goal(struct inode *inode, long block,
Indirect *partial, unsigned long *goal) Indirect chain[4], Indirect *partial)
{ {
struct ext3_block_alloc_info *block_i = EXT3_I(inode)->i_block_alloc_info; struct ext3_block_alloc_info *block_i = EXT3_I(inode)->i_block_alloc_info;
...@@ -470,15 +469,10 @@ static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4], ...@@ -470,15 +469,10 @@ static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4],
*/ */
if (block_i && (block == block_i->last_alloc_logical_block + 1) if (block_i && (block == block_i->last_alloc_logical_block + 1)
&& (block_i->last_alloc_physical_block != 0)) { && (block_i->last_alloc_physical_block != 0)) {
*goal = block_i->last_alloc_physical_block + 1; return block_i->last_alloc_physical_block + 1;
return 0;
} }
if (verify_chain(chain, partial)) { return ext3_find_near(inode, partial);
*goal = ext3_find_near(inode, partial);
return 0;
}
return -EAGAIN;
} }
/** /**
...@@ -582,12 +576,9 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode, ...@@ -582,12 +576,9 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
* @where: location of missing link * @where: location of missing link
* @num: number of blocks we are adding * @num: number of blocks we are adding
* *
* This function verifies that chain (up to the missing link) had not * This function fills the missing link and does all housekeeping needed in
* changed, fills the missing link and does all housekeeping needed in
* inode (->i_blocks, etc.). In case of success we end up with the full * inode (->i_blocks, etc.). In case of success we end up with the full
* chain to new block and return 0. Otherwise (== chain had been changed) * chain to new block and return 0.
* we free the new blocks (forgetting their buffer_heads, indeed) and
* return -EAGAIN.
*/ */
static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block, static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
...@@ -608,12 +599,6 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block, ...@@ -608,12 +599,6 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
if (err) if (err)
goto err_out; goto err_out;
} }
/* Verify that place we are splicing to is still there and vacant */
if (!verify_chain(chain, where-1) || *where->p)
/* Writer: end */
goto changed;
/* That's it */ /* That's it */
*where->p = where->key; *where->p = where->key;
...@@ -657,26 +642,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block, ...@@ -657,26 +642,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
} }
return err; return err;
changed:
/*
* AKPM: if where[i].bh isn't part of the current updating
* transaction then we explode nastily. Test this code path.
*/
jbd_debug(1, "the chain changed: try again\n");
err = -EAGAIN;
err_out: err_out:
for (i = 1; i < num; i++) { for (i = 1; i < num; i++) {
BUFFER_TRACE(where[i].bh, "call journal_forget"); BUFFER_TRACE(where[i].bh, "call journal_forget");
ext3_journal_forget(handle, where[i].bh); ext3_journal_forget(handle, where[i].bh);
} }
/* For the normal collision cleanup case, we free up the blocks.
* On genuine filesystem errors we don't even think about doing
* that. */
if (err == -EAGAIN)
for (i = 0; i < num; i++)
ext3_free_blocks(handle, inode,
le32_to_cpu(where[i].key), 1);
return err; return err;
} }
...@@ -708,7 +678,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, ...@@ -708,7 +678,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
unsigned long goal; unsigned long goal;
int left; int left;
int boundary = 0; int boundary = 0;
int depth = ext3_block_to_path(inode, iblock, offsets, &boundary); const int depth = ext3_block_to_path(inode, iblock, offsets, &boundary);
struct ext3_inode_info *ei = EXT3_I(inode); struct ext3_inode_info *ei = EXT3_I(inode);
J_ASSERT(handle != NULL || create == 0); J_ASSERT(handle != NULL || create == 0);
...@@ -716,54 +686,55 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, ...@@ -716,54 +686,55 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
if (depth == 0) if (depth == 0)
goto out; goto out;
reread:
partial = ext3_get_branch(inode, depth, offsets, chain, &err); partial = ext3_get_branch(inode, depth, offsets, chain, &err);
/* Simplest case - block found, no allocation needed */ /* Simplest case - block found, no allocation needed */
if (!partial) { if (!partial) {
clear_buffer_new(bh_result); clear_buffer_new(bh_result);
got_it: goto got_it;
map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
if (boundary)
set_buffer_boundary(bh_result);
/* Clean up and exit */
partial = chain+depth-1; /* the whole chain */
goto cleanup;
} }
/* Next simple case - plain lookup or failed read of indirect block */ /* Next simple case - plain lookup or failed read of indirect block */
if (!create || err == -EIO) { if (!create || err == -EIO)
cleanup: goto cleanup;
down(&ei->truncate_sem);
/*
* If the indirect block is missing while we are reading
* the chain(ext3_get_branch() returns -EAGAIN err), or
* if the chain has been changed after we grab the semaphore,
* (either because another process truncated this branch, or
* another get_block allocated this branch) re-grab the chain to see if
* the request block has been allocated or not.
*
* Since we already block the truncate/other get_block
* at this point, we will have the current copy of the chain when we
* splice the branch into the tree.
*/
if (err == -EAGAIN || !verify_chain(chain, partial)) {
while (partial > chain) { while (partial > chain) {
BUFFER_TRACE(partial->bh, "call brelse");
brelse(partial->bh); brelse(partial->bh);
partial--; partial--;
} }
BUFFER_TRACE(bh_result, "returned"); partial = ext3_get_branch(inode, depth, offsets, chain, &err);
out: if (!partial) {
return err; up(&ei->truncate_sem);
if (err)
goto cleanup;
clear_buffer_new(bh_result);
goto got_it;
}
} }
/* /*
* Indirect block might be removed by truncate while we were * Okay, we need to do block allocation. Lazily initialize the block
* reading it. Handling of that case (forget what we've got and * allocation info here if necessary
* reread) is taken out of the main path.
*/ */
if (err == -EAGAIN) if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
goto changed;
goal = 0;
down(&ei->truncate_sem);
/* lazy initialize the block allocation info here if necessary */
if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) {
ext3_init_block_alloc_info(inode); ext3_init_block_alloc_info(inode);
}
if (ext3_find_goal(inode, iblock, chain, partial, &goal) < 0) { goal = ext3_find_goal(inode, iblock, chain, partial);
up(&ei->truncate_sem);
goto changed;
}
left = (chain + depth) - partial; left = (chain + depth) - partial;
...@@ -771,38 +742,45 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock, ...@@ -771,38 +742,45 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
* Block out ext3_truncate while we alter the tree * Block out ext3_truncate while we alter the tree
*/ */
err = ext3_alloc_branch(handle, inode, left, goal, err = ext3_alloc_branch(handle, inode, left, goal,
offsets+(partial-chain), partial); offsets + (partial - chain), partial);
/* The ext3_splice_branch call will free and forget any buffers /*
* The ext3_splice_branch call will free and forget any buffers
* on the new chain if there is a failure, but that risks using * on the new chain if there is a failure, but that risks using
* up transaction credits, especially for bitmaps where the * up transaction credits, especially for bitmaps where the
* credits cannot be returned. Can we handle this somehow? We * credits cannot be returned. Can we handle this somehow? We
* may need to return -EAGAIN upwards in the worst case. --sct */ * may need to return -EAGAIN upwards in the worst case. --sct
*/
if (!err) if (!err)
err = ext3_splice_branch(handle, inode, iblock, chain, err = ext3_splice_branch(handle, inode, iblock, chain,
partial, left); partial, left);
/* i_disksize growing is protected by truncate_sem /*
* don't forget to protect it if you're about to implement * i_disksize growing is protected by truncate_sem. Don't forget to
* concurrent ext3_get_block() -bzzz */ * protect it if you're about to implement concurrent
* ext3_get_block() -bzzz
*/
if (!err && extend_disksize && inode->i_size > ei->i_disksize) if (!err && extend_disksize && inode->i_size > ei->i_disksize)
ei->i_disksize = inode->i_size; ei->i_disksize = inode->i_size;
up(&ei->truncate_sem); up(&ei->truncate_sem);
if (err == -EAGAIN)
goto changed;
if (err) if (err)
goto cleanup; goto cleanup;
set_buffer_new(bh_result); set_buffer_new(bh_result);
goto got_it; got_it:
map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
changed: if (boundary)
set_buffer_boundary(bh_result);
/* Clean up and exit */
partial = chain + depth - 1; /* the whole chain */
cleanup:
while (partial > chain) { while (partial > chain) {
jbd_debug(1, "buffer chain changed, retrying\n"); BUFFER_TRACE(partial->bh, "call brelse");
BUFFER_TRACE(partial->bh, "brelsing");
brelse(partial->bh); brelse(partial->bh);
partial--; partial--;
} }
goto reread; BUFFER_TRACE(bh_result, "returned");
out:
return err;
} }
static int ext3_get_block(struct inode *inode, sector_t iblock, static int ext3_get_block(struct inode *inode, sector_t iblock,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment