Commit a6d40408 authored by Theodore Ts'o's avatar Theodore Ts'o

Merge branch 'jk/jbd2-revoke-overflow'

parents 0d0a60c9 19014d69
......@@ -2609,7 +2609,6 @@ extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *);
extern int ext4_break_layouts(struct inode *);
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
......@@ -3303,6 +3302,10 @@ extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
ext4_lblk_t lblk2, ext4_lblk_t count,
int mark_unwritten,int *err);
extern int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu);
extern int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
int check_cred, int restart_cred,
int revoke_cred);
/* move_extent.c */
extern void ext4_double_down_write_data_sem(struct inode *first,
......
......@@ -65,12 +65,14 @@ static int ext4_journal_check_start(struct super_block *sb)
}
handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
int type, int blocks, int rsv_blocks)
int type, int blocks, int rsv_blocks,
int revoke_creds)
{
journal_t *journal;
int err;
trace_ext4_journal_start(sb, blocks, rsv_blocks, _RET_IP_);
trace_ext4_journal_start(sb, blocks, rsv_blocks, revoke_creds,
_RET_IP_);
err = ext4_journal_check_start(sb);
if (err < 0)
return ERR_PTR(err);
......@@ -78,8 +80,8 @@ handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
journal = EXT4_SB(sb)->s_journal;
if (!journal)
return ext4_get_nojournal();
return jbd2__journal_start(journal, blocks, rsv_blocks, GFP_NOFS,
type, line);
return jbd2__journal_start(journal, blocks, rsv_blocks, revoke_creds,
GFP_NOFS, type, line);
}
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
......@@ -119,8 +121,8 @@ handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
return ext4_get_nojournal();
sb = handle->h_journal->j_private;
trace_ext4_journal_start_reserved(sb, handle->h_buffer_credits,
_RET_IP_);
trace_ext4_journal_start_reserved(sb,
jbd2_handle_buffer_credits(handle), _RET_IP_);
err = ext4_journal_check_start(sb);
if (err < 0) {
jbd2_journal_free_reserved(handle);
......@@ -133,6 +135,19 @@ handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
return handle;
}
int __ext4_journal_ensure_credits(handle_t *handle, int check_cred,
int extend_cred, int revoke_cred)
{
if (!ext4_handle_valid(handle))
return 0;
if (jbd2_handle_buffer_credits(handle) >= check_cred &&
handle->h_revoke_credits >= revoke_cred)
return 0;
extend_cred = max(0, extend_cred - jbd2_handle_buffer_credits(handle));
revoke_cred = max(0, revoke_cred - handle->h_revoke_credits);
return ext4_journal_extend(handle, extend_cred, revoke_cred);
}
static void ext4_journal_abort_handle(const char *caller, unsigned int line,
const char *err_fn,
struct buffer_head *bh,
......@@ -278,7 +293,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
handle->h_type,
handle->h_line_no,
handle->h_requested_credits,
handle->h_buffer_credits, err);
jbd2_handle_buffer_credits(handle), err);
return err;
}
ext4_error_inode(inode, where, line,
......@@ -289,7 +304,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
handle->h_type,
handle->h_line_no,
handle->h_requested_credits,
handle->h_buffer_credits, err);
jbd2_handle_buffer_credits(handle),
err);
}
} else {
if (inode)
......
......@@ -261,7 +261,8 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
int type, int blocks, int rsv_blocks);
int type, int blocks, int rsv_blocks,
int revoke_creds);
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
......@@ -288,28 +289,41 @@ static inline int ext4_handle_is_aborted(handle_t *handle)
return 0;
}
static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
static inline int ext4_free_metadata_revoke_credits(struct super_block *sb,
int blocks)
{
if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed)
return 0;
return 1;
/* Freeing each metadata block can result in freeing one cluster */
return blocks * EXT4_SB(sb)->s_cluster_ratio;
}
static inline int ext4_trans_default_revoke_credits(struct super_block *sb)
{
return ext4_free_metadata_revoke_credits(sb, 8);
}
#define ext4_journal_start_sb(sb, type, nblocks) \
__ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0)
__ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0, \
ext4_trans_default_revoke_credits(sb))
#define ext4_journal_start(inode, type, nblocks) \
__ext4_journal_start((inode), __LINE__, (type), (nblocks), 0)
__ext4_journal_start((inode), __LINE__, (type), (nblocks), 0, \
ext4_trans_default_revoke_credits((inode)->i_sb))
#define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks)\
__ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks),\
ext4_trans_default_revoke_credits((inode)->i_sb))
#define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks) \
__ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks))
#define ext4_journal_start_with_revoke(inode, type, blocks, revoke_creds) \
__ext4_journal_start((inode), __LINE__, (type), (blocks), 0, \
(revoke_creds))
static inline handle_t *__ext4_journal_start(struct inode *inode,
unsigned int line, int type,
int blocks, int rsv_blocks)
int blocks, int rsv_blocks,
int revoke_creds)
{
return __ext4_journal_start_sb(inode->i_sb, line, type, blocks,
rsv_blocks);
rsv_blocks, revoke_creds);
}
#define ext4_journal_stop(handle) \
......@@ -332,20 +346,68 @@ static inline handle_t *ext4_journal_current_handle(void)
return journal_current_handle();
}
static inline int ext4_journal_extend(handle_t *handle, int nblocks)
static inline int ext4_journal_extend(handle_t *handle, int nblocks, int revoke)
{
if (ext4_handle_valid(handle))
return jbd2_journal_extend(handle, nblocks);
return jbd2_journal_extend(handle, nblocks, revoke);
return 0;
}
static inline int ext4_journal_restart(handle_t *handle, int nblocks)
static inline int ext4_journal_restart(handle_t *handle, int nblocks,
int revoke)
{
if (ext4_handle_valid(handle))
return jbd2_journal_restart(handle, nblocks);
return jbd2__journal_restart(handle, nblocks, revoke, GFP_NOFS);
return 0;
}
int __ext4_journal_ensure_credits(handle_t *handle, int check_cred,
int extend_cred, int revoke_cred);
/*
* Ensure @handle has at least @check_creds credits available. If not,
* transaction will be extended or restarted to contain at least @extend_cred
* credits. Before restarting transaction @fn is executed to allow for cleanup
* before the transaction is restarted.
*
* The return value is < 0 in case of error, 0 in case the handle has enough
* credits or transaction extension succeeded, 1 in case transaction had to be
* restarted.
*/
#define ext4_journal_ensure_credits_fn(handle, check_cred, extend_cred, \
revoke_cred, fn) \
({ \
__label__ __ensure_end; \
int err = __ext4_journal_ensure_credits((handle), (check_cred), \
(extend_cred), (revoke_cred)); \
\
if (err <= 0) \
goto __ensure_end; \
err = (fn); \
if (err < 0) \
goto __ensure_end; \
err = ext4_journal_restart((handle), (extend_cred), (revoke_cred)); \
if (err == 0) \
err = 1; \
__ensure_end: \
err; \
})
/*
* Ensure given handle has at least requested amount of credits available,
* possibly restarting transaction if needed. We also make sure the transaction
* has space for at least ext4_trans_default_revoke_credits(sb) revoke records
* as freeing one or two blocks is very common pattern and requesting this is
* very cheap.
*/
static inline int ext4_journal_ensure_credits(handle_t *handle, int credits,
int revoke_creds)
{
return ext4_journal_ensure_credits_fn(handle, credits, credits,
revoke_creds, 0);
}
static inline int ext4_journal_blocks_per_page(struct inode *inode)
{
if (EXT4_JOURNAL(inode) != NULL)
......@@ -407,6 +469,7 @@ static inline int ext4_inode_journal_mode(struct inode *inode)
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
/* We do not support data journalling with delayed allocation */
if (!S_ISREG(inode->i_mode) ||
ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE) ||
test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
(ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
!test_opt(inode->i_sb, DELALLOC))) {
......@@ -437,6 +500,19 @@ static inline int ext4_should_writeback_data(struct inode *inode)
return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE;
}
static inline int ext4_free_data_revoke_credits(struct inode *inode, int blocks)
{
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
return 0;
if (!ext4_should_journal_data(inode))
return 0;
/*
* Data blocks in one extent are contiguous, just account for partial
* clusters at extent boundaries
*/
return blocks + 2*(EXT4_SB(inode->i_sb)->s_cluster_ratio - 1);
}
/*
* This function controls whether or not we should try to go down the
* dioread_nolock code paths, which makes it safe to avoid taking
......
......@@ -100,29 +100,41 @@ static int ext4_split_extent_at(handle_t *handle,
static int ext4_find_delayed_extent(struct inode *inode,
struct extent_status *newes);
static int ext4_ext_truncate_extend_restart(handle_t *handle,
struct inode *inode,
int needed)
static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
{
int err;
if (!ext4_handle_valid(handle))
return 0;
if (handle->h_buffer_credits >= needed)
return 0;
/*
* If we need to extend the journal get a few extra blocks
* while we're at it for efficiency's sake.
* Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
* moment, get_block can be called only for blocks inside i_size since
* page cache has been already dropped and writes are blocked by
* i_mutex. So we can safely drop the i_data_sem here.
*/
needed += 3;
err = ext4_journal_extend(handle, needed - handle->h_buffer_credits);
if (err <= 0)
return err;
err = ext4_truncate_restart_trans(handle, inode, needed);
if (err == 0)
err = -EAGAIN;
BUG_ON(EXT4_JOURNAL(inode) == NULL);
ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem);
*dropped = 1;
return 0;
}
return err;
/*
* Make sure 'handle' has at least 'check_cred' credits. If not, restart
* transaction with 'restart_cred' credits. The function drops i_data_sem
* when restarting transaction and gets it after transaction is restarted.
*
* The function returns 0 on success, 1 if transaction had to be restarted,
* and < 0 in case of fatal error.
*/
int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
int check_cred, int restart_cred,
int revoke_cred)
{
int ret;
int dropped = 0;
ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred,
revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped));
if (dropped)
down_write(&EXT4_I(inode)->i_data_sem);
return ret;
}
/*
......@@ -1840,7 +1852,8 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
* group descriptor to release the extent tree block. If we
* can't get the journal credits, give up.
*/
if (ext4_journal_extend(handle, 2))
if (ext4_journal_extend(handle, 2,
ext4_free_metadata_revoke_credits(inode->i_sb, 1)))
return;
/*
......@@ -2727,7 +2740,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int err = 0, correct_index = 0;
int depth = ext_depth(inode), credits;
int depth = ext_depth(inode), credits, revoke_credits;
struct ext4_extent_header *eh;
ext4_lblk_t a, b;
unsigned num;
......@@ -2819,10 +2832,23 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
credits += (ext_depth(inode)) + 1;
}
credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
err = ext4_ext_truncate_extend_restart(handle, inode, credits);
if (err)
/*
* We may end up freeing some index blocks and data from the
* punched range. Note that partial clusters are accounted for
* by ext4_free_data_revoke_credits().
*/
revoke_credits =
ext4_free_metadata_revoke_credits(inode->i_sb,
ext_depth(inode)) +
ext4_free_data_revoke_credits(inode, b - a + 1);
err = ext4_datasem_ensure_credits(handle, inode, credits,
credits, revoke_credits);
if (err) {
if (err > 0)
err = -EAGAIN;
goto out;
}
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
......@@ -2948,7 +2974,9 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
ext_debug("truncate since %u to %u\n", start, end);
/* probably first extent we're gonna free will be last in block */
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, depth + 1);
handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE,
depth + 1,
ext4_free_metadata_revoke_credits(inode->i_sb, depth));
if (IS_ERR(handle))
return PTR_ERR(handle);
......@@ -5225,13 +5253,10 @@ ext4_access_path(handle_t *handle, struct inode *inode,
* descriptor) for each block group; assume two block
* groups
*/
if (handle->h_buffer_credits < 7) {
credits = ext4_writepage_trans_blocks(inode);
err = ext4_ext_truncate_extend_restart(handle, inode, credits);
/* EAGAIN is success */
if (err && err != -EAGAIN)
return err;
}
credits = ext4_writepage_trans_blocks(inode);
err = ext4_datasem_ensure_credits(handle, inode, 7, credits, 0);
if (err < 0)
return err;
err = ext4_ext_get_access(handle, inode, path);
return err;
......
......@@ -927,7 +927,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
BUG_ON(nblocks <= 0);
handle = __ext4_journal_start_sb(dir->i_sb, line_no,
handle_type, nblocks,
0);
0, 0);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
ext4_std_error(sb, err);
......
......@@ -331,11 +331,14 @@ static int ext4_alloc_branch(handle_t *handle,
for (i = 0; i <= indirect_blks; i++) {
if (i == indirect_blks) {
new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
} else
} else {
ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
ar->inode, ar->goal,
ar->flags & EXT4_MB_DELALLOC_RESERVED,
NULL, &err);
/* Simplify error cleanup... */
branch[i+1].bh = NULL;
}
if (err) {
i--;
goto failed;
......@@ -377,18 +380,25 @@ static int ext4_alloc_branch(handle_t *handle,
}
return 0;
failed:
if (i == indirect_blks) {
/* Free data blocks */
ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
ar->len, 0);
i--;
}
for (; i >= 0; i--) {
/*
* We want to ext4_forget() only freshly allocated indirect
* blocks. Buffer for new_blocks[i-1] is at branch[i].bh and
* buffer at branch[0].bh is indirect block / inode already
* existing before ext4_alloc_branch() was called.
* blocks. Buffer for new_blocks[i] is at branch[i+1].bh
* (buffer at branch[0].bh is indirect block / inode already
* existing before ext4_alloc_branch() was called). Also
* because blocks are freshly allocated, we don't need to
* revoke them which is why we don't set
* EXT4_FREE_BLOCKS_METADATA.
*/
if (i > 0 && i != indirect_blks && branch[i].bh)
ext4_forget(handle, 1, ar->inode, branch[i].bh,
branch[i].bh->b_blocknr);
ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
(i == indirect_blks) ? ar->len : 1, 0);
ext4_free_blocks(handle, ar->inode, branch[i+1].bh,
new_blocks[i], 1,
branch[i+1].bh ? EXT4_FREE_BLOCKS_FORGET : 0);
}
return err;
}
......@@ -689,27 +699,63 @@ int ext4_ind_trans_blocks(struct inode *inode, int nrblocks)
return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
}
static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int *dropped)
{
int err;
if (bh) {
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, inode, bh);
if (unlikely(err))
return err;
}
err = ext4_mark_inode_dirty(handle, inode);
if (unlikely(err))
return err;
/*
* Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
* moment, get_block can be called only for blocks inside i_size since
* page cache has been already dropped and writes are blocked by
* i_mutex. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem);
*dropped = 1;
return 0;
}
/*
* Truncate transactions can be complex and absolutely huge. So we need to
* be able to restart the transaction at a conventient checkpoint to make
* sure we don't overflow the journal.
*
* Try to extend this transaction for the purposes of truncation. If
* extend fails, we need to propagate the failure up and restart the
* transaction in the top-level truncate loop. --sct
*
* Returns 0 if we managed to create more room. If we can't create more
* room, and the transaction must be restarted we return 1.
* extend fails, we restart transaction.
*/
static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
static int ext4_ind_truncate_ensure_credits(handle_t *handle,
struct inode *inode,
struct buffer_head *bh,
int revoke_creds)
{
if (!ext4_handle_valid(handle))
return 0;
if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
return 0;
if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode)))
return 0;
return 1;
int ret;
int dropped = 0;
ret = ext4_journal_ensure_credits_fn(handle, EXT4_RESERVE_TRANS_BLOCKS,
ext4_blocks_for_truncate(inode), revoke_creds,
ext4_ind_trunc_restart_fn(handle, inode, bh, &dropped));
if (dropped)
down_write(&EXT4_I(inode)->i_data_sem);
if (ret <= 0)
return ret;
if (bh) {
BUFFER_TRACE(bh, "retaking write access");
ret = ext4_journal_get_write_access(handle, bh);
if (unlikely(ret))
return ret;
}
return 0;
}
/*
......@@ -844,27 +890,10 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
return 1;
}
if (try_to_extend_transaction(handle, inode)) {
if (bh) {
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, inode, bh);
if (unlikely(err))
goto out_err;
}
err = ext4_mark_inode_dirty(handle, inode);
if (unlikely(err))
goto out_err;
err = ext4_truncate_restart_trans(handle, inode,
ext4_blocks_for_truncate(inode));
if (unlikely(err))
goto out_err;
if (bh) {
BUFFER_TRACE(bh, "retaking write access");
err = ext4_journal_get_write_access(handle, bh);
if (unlikely(err))
goto out_err;
}
}
err = ext4_ind_truncate_ensure_credits(handle, inode, bh,
ext4_free_data_revoke_credits(inode, count));
if (err < 0)
goto out_err;
for (p = first; p < last; p++)
*p = 0;
......@@ -1047,11 +1076,11 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
*/
if (ext4_handle_is_aborted(handle))
return;
if (try_to_extend_transaction(handle, inode)) {
ext4_mark_inode_dirty(handle, inode);
ext4_truncate_restart_trans(handle, inode,
ext4_blocks_for_truncate(inode));
}
if (ext4_ind_truncate_ensure_credits(handle, inode,
NULL,
ext4_free_metadata_revoke_credits(
inode->i_sb, 1)) < 0)
return;
/*
* The forget flag here is critical because if
......
......@@ -163,32 +163,6 @@ int ext4_inode_is_fast_symlink(struct inode *inode)
(inode->i_size < EXT4_N_BLOCKS * 4);
}
/*
* Restart the transaction associated with *handle. This does a commit,
* so before we call here everything must be consistently dirtied against
* this transaction.
*/
int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
int nblocks)
{
int ret;
/*
* Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
* moment, get_block can be called only for blocks inside i_size since
* page cache has been already dropped and writes are blocked by
* i_mutex. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
jbd_debug(2, "restarting handle %p\n", handle);
up_write(&EXT4_I(inode)->i_data_sem);
ret = ext4_journal_restart(handle, nblocks);
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode);
return ret;
}
/*
* Called at the last iput() if i_nlink is zero.
*/
......@@ -196,7 +170,12 @@ void ext4_evict_inode(struct inode *inode)
{
handle_t *handle;
int err;
int extra_credits = 3;
/*
* Credits for final inode cleanup and freeing:
* sb + inode (ext4_orphan_del()), block bitmap, group descriptor
* (xattr block freeing), bitmap, group descriptor (inode freeing)
*/
int extra_credits = 6;
struct ext4_xattr_inode_array *ea_inode_array = NULL;
trace_ext4_evict_inode(inode);
......@@ -252,8 +231,12 @@ void ext4_evict_inode(struct inode *inode)
if (!IS_NOQUOTA(inode))
extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
/*
* Block bitmap, group descriptor, and inode are accounted in both
* ext4_blocks_for_truncate() and extra_credits. So subtract 3.
*/
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
ext4_blocks_for_truncate(inode)+extra_credits);
ext4_blocks_for_truncate(inode) + extra_credits - 3);
if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle));
/*
......@@ -6009,9 +5992,8 @@ static int ext4_try_to_expand_extra_isize(struct inode *inode,
* If this is felt to be critical, then e2fsck should be run to
* force a large enough s_min_extra_isize.
*/
if (ext4_handle_valid(handle) &&
jbd2_journal_extend(handle,
EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) != 0)
if (ext4_journal_extend(handle,
EXT4_DATA_TRANS_BLOCKS(inode->i_sb), 0) != 0)
return -ENOSPC;
if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
......
......@@ -50,29 +50,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
needed = ext4_ext_calc_credits_for_single_extent(inode,
lb->last_block - lb->first_block + 1, path);
/*
* Make sure the credit we accumalated is not really high
*/
if (needed && ext4_handle_has_enough_credits(handle,
EXT4_RESERVE_TRANS_BLOCKS)) {
up_write((&EXT4_I(inode)->i_data_sem));
retval = ext4_journal_restart(handle, needed);
down_write((&EXT4_I(inode)->i_data_sem));
if (retval)
goto err_out;
} else if (needed) {
retval = ext4_journal_extend(handle, needed);
if (retval) {
/*
* IF not able to extend the journal restart the journal
*/
up_write((&EXT4_I(inode)->i_data_sem));
retval = ext4_journal_restart(handle, needed);
down_write((&EXT4_I(inode)->i_data_sem));
if (retval)
goto err_out;
}
}
retval = ext4_datasem_ensure_credits(handle, inode, needed, needed, 0);
if (retval < 0)
goto err_out;
retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0);
err_out:
up_write((&EXT4_I(inode)->i_data_sem));
......@@ -196,42 +176,30 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
}
static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
{
int retval = 0, needed;
if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
return 0;
/*
* We are freeing a blocks. During this we touch
* superblock, group descriptor and block bitmap.
* So allocate a credit of 3. We may update
* quota (user and group).
*/
needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
if (ext4_journal_extend(handle, needed) != 0)
retval = ext4_journal_restart(handle, needed);
return retval;
}
static int free_dind_blocks(handle_t *handle,
struct inode *inode, __le32 i_data)
{
int i;
__le32 *tmp_idata;
struct buffer_head *bh;
struct super_block *sb = inode->i_sb;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
int err;
bh = ext4_sb_bread(inode->i_sb, le32_to_cpu(i_data), 0);
bh = ext4_sb_bread(sb, le32_to_cpu(i_data), 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
tmp_idata = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
if (tmp_idata[i]) {
extend_credit_for_blkdel(handle, inode);
err = ext4_journal_ensure_credits(handle,
EXT4_RESERVE_TRANS_BLOCKS,
ext4_free_metadata_revoke_credits(sb, 1));
if (err < 0) {
put_bh(bh);
return err;
}
ext4_free_blocks(handle, inode, NULL,
le32_to_cpu(tmp_idata[i]), 1,
EXT4_FREE_BLOCKS_METADATA |
......@@ -239,7 +207,10 @@ static int free_dind_blocks(handle_t *handle,
}
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
err = ext4_journal_ensure_credits(handle, EXT4_RESERVE_TRANS_BLOCKS,
ext4_free_metadata_revoke_credits(sb, 1));
if (err < 0)
return err;
ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
......@@ -270,7 +241,10 @@ static int free_tind_blocks(handle_t *handle,
}
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
retval = ext4_journal_ensure_credits(handle, EXT4_RESERVE_TRANS_BLOCKS,
ext4_free_metadata_revoke_credits(inode->i_sb, 1));
if (retval < 0)
return retval;
ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
......@@ -283,7 +257,11 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
/* ei->i_data[EXT4_IND_BLOCK] */
if (i_data[0]) {
extend_credit_for_blkdel(handle, inode);
retval = ext4_journal_ensure_credits(handle,
EXT4_RESERVE_TRANS_BLOCKS,
ext4_free_metadata_revoke_credits(inode->i_sb, 1));
if (retval < 0)
return retval;
ext4_free_blocks(handle, inode, NULL,
le32_to_cpu(i_data[0]), 1,
EXT4_FREE_BLOCKS_METADATA |
......@@ -318,12 +296,9 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
* One credit accounted for writing the
* i_data field of the original inode
*/
retval = ext4_journal_extend(handle, 1);
if (retval) {
retval = ext4_journal_restart(handle, 1);
if (retval)
goto err_out;
}
retval = ext4_journal_ensure_credits(handle, 1, 0);
if (retval < 0)
goto err_out;
i_data[0] = ei->i_data[EXT4_IND_BLOCK];
i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
......@@ -391,15 +366,20 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
ix = EXT_FIRST_INDEX(eh);
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
retval = free_ext_idx(handle, inode, ix);
if (retval)
break;
if (retval) {
put_bh(bh);
return retval;
}
}
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
retval = ext4_journal_ensure_credits(handle, EXT4_RESERVE_TRANS_BLOCKS,
ext4_free_metadata_revoke_credits(inode->i_sb, 1));
if (retval < 0)
return retval;
ext4_free_blocks(handle, inode, NULL, block, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
return retval;
return 0;
}
/*
......@@ -574,9 +554,9 @@ int ext4_ext_migrate(struct inode *inode)
}
/* We mark the tmp_inode dirty via ext4_ext_tree_init. */
if (ext4_journal_extend(handle, 1) != 0)
ext4_journal_restart(handle, 1);
retval = ext4_journal_ensure_credits(handle, 1, 0);
if (retval < 0)
goto out_stop;
/*
* Mark the tmp_inode as of size zero
*/
......@@ -594,6 +574,7 @@ int ext4_ext_migrate(struct inode *inode)
/* Reset the extent details */
ext4_ext_tree_init(handle, tmp_inode);
out_stop:
ext4_journal_stop(handle);
out:
unlock_new_inode(tmp_inode);
......
......@@ -2547,18 +2547,29 @@ static void ext4_dec_count(handle_t *handle, struct inode *inode)
}
/*
* Add non-directory inode to a directory. On success, the inode reference is
* consumed by dentry is instantiation. This is also indicated by clearing of
* *inodep pointer. On failure, the caller is responsible for dropping the
* inode reference in the safe context.
*/
static int ext4_add_nondir(handle_t *handle,
struct dentry *dentry, struct inode *inode)
struct dentry *dentry, struct inode **inodep)
{
struct inode *dir = d_inode(dentry->d_parent);
struct inode *inode = *inodep;
int err = ext4_add_entry(handle, dentry, inode);
if (!err) {
ext4_mark_inode_dirty(handle, inode);
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
d_instantiate_new(dentry, inode);
*inodep = NULL;
return 0;
}
drop_nlink(inode);
ext4_orphan_add(handle, inode);
unlock_new_inode(inode);
iput(inode);
return err;
}
......@@ -2592,12 +2603,12 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
err = ext4_add_nondir(handle, dentry, inode);
if (!err && IS_DIRSYNC(dir))
ext4_handle_sync(handle);
err = ext4_add_nondir(handle, dentry, &inode);
}
if (handle)
ext4_journal_stop(handle);
if (!IS_ERR_OR_NULL(inode))
iput(inode);
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
goto retry;
return err;
......@@ -2624,12 +2635,12 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &ext4_special_inode_operations;
err = ext4_add_nondir(handle, dentry, inode);
if (!err && IS_DIRSYNC(dir))
ext4_handle_sync(handle);
err = ext4_add_nondir(handle, dentry, &inode);
}
if (handle)
ext4_journal_stop(handle);
if (!IS_ERR_OR_NULL(inode))
iput(inode);
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
goto retry;
return err;
......@@ -2779,10 +2790,12 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (err) {
out_clear_inode:
clear_nlink(inode);
ext4_orphan_add(handle, inode);
unlock_new_inode(inode);
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
iput(inode);
goto out_stop;
goto out_retry;
}
ext4_inc_count(handle, dir);
ext4_update_dx_flag(dir);
......@@ -2796,6 +2809,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
out_stop:
if (handle)
ext4_journal_stop(handle);
out_retry:
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
goto retry;
return err;
......@@ -3328,12 +3342,11 @@ static int ext4_symlink(struct inode *dir,
inode->i_size = disk_link.len - 1;
}
EXT4_I(inode)->i_disksize = inode->i_size;
err = ext4_add_nondir(handle, dentry, inode);
if (!err && IS_DIRSYNC(dir))
ext4_handle_sync(handle);
err = ext4_add_nondir(handle, dentry, &inode);
if (handle)
ext4_journal_stop(handle);
if (inode)
iput(inode);
goto out_free_encrypted_link;
err_drop_inode:
......
......@@ -388,28 +388,10 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
return bh;
}
/*
* If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA.
* If that fails, restart the transaction & regain write access for the
* buffer head which is used for block_bitmap modifications.
*/
static int extend_or_restart_transaction(handle_t *handle, int thresh)
static int ext4_resize_ensure_credits_batch(handle_t *handle, int credits)
{
int err;
if (ext4_handle_has_enough_credits(handle, thresh))
return 0;
err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
if (err < 0)
return err;
if (err) {
err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA);
if (err)
return err;
}
return 0;
return ext4_journal_ensure_credits_fn(handle, credits,
EXT4_MAX_TRANS_DATA, 0, 0);
}
/*
......@@ -451,8 +433,8 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
continue;
}
err = extend_or_restart_transaction(handle, 1);
if (err)
err = ext4_resize_ensure_credits_batch(handle, 1);
if (err < 0)
return err;
bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
......@@ -544,8 +526,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
struct buffer_head *gdb;
ext4_debug("update backup group %#04llx\n", block);
err = extend_or_restart_transaction(handle, 1);
if (err)
err = ext4_resize_ensure_credits_batch(handle, 1);
if (err < 0)
goto out;
gdb = sb_getblk(sb, block);
......@@ -602,8 +584,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
/* Initialize block bitmap of the @group */
block = group_data[i].block_bitmap;
err = extend_or_restart_transaction(handle, 1);
if (err)
err = ext4_resize_ensure_credits_batch(handle, 1);
if (err < 0)
goto out;
bh = bclean(handle, sb, block);
......@@ -631,8 +613,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
/* Initialize inode bitmap of the @group */
block = group_data[i].inode_bitmap;
err = extend_or_restart_transaction(handle, 1);
if (err)
err = ext4_resize_ensure_credits_batch(handle, 1);
if (err < 0)
goto out;
/* Mark unused entries in inode bitmap used */
bh = bclean(handle, sb, block);
......@@ -1109,10 +1091,8 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
ext4_fsblk_t backup_block;
/* Out of journal space, and can't get more - abort - so sad */
if (ext4_handle_valid(handle) &&
handle->h_buffer_credits == 0 &&
ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
(err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
err = ext4_resize_ensure_credits_batch(handle, 1);
if (err < 0)
break;
if (meta_bg == 0)
......
......@@ -967,55 +967,6 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
return credits;
}
static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
int credits, struct buffer_head *bh,
bool dirty, bool block_csum)
{
int error;
if (!ext4_handle_valid(handle))
return 0;
if (handle->h_buffer_credits >= credits)
return 0;
error = ext4_journal_extend(handle, credits - handle->h_buffer_credits);
if (!error)
return 0;
if (error < 0) {
ext4_warning(inode->i_sb, "Extend journal (error %d)", error);
return error;
}
if (bh && dirty) {
if (block_csum)
ext4_xattr_block_csum_set(inode, bh);
error = ext4_handle_dirty_metadata(handle, NULL, bh);
if (error) {
ext4_warning(inode->i_sb, "Handle metadata (error %d)",
error);
return error;
}
}
error = ext4_journal_restart(handle, credits);
if (error) {
ext4_warning(inode->i_sb, "Restart journal (error %d)", error);
return error;
}
if (bh) {
error = ext4_journal_get_write_access(handle, bh);
if (error) {
ext4_warning(inode->i_sb,
"Get write access failed (error %d)",
error);
return error;
}
}
return 0;
}
static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
int ref_change)
{
......@@ -1149,6 +1100,24 @@ static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
return saved_err;
}
static int ext4_xattr_restart_fn(handle_t *handle, struct inode *inode,
struct buffer_head *bh, bool block_csum, bool dirty)
{
int error;
if (bh && dirty) {
if (block_csum)
ext4_xattr_block_csum_set(inode, bh);
error = ext4_handle_dirty_metadata(handle, NULL, bh);
if (error) {
ext4_warning(inode->i_sb, "Handle metadata (error %d)",
error);
return error;
}
}
return 0;
}
static void
ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
struct buffer_head *bh,
......@@ -1185,13 +1154,24 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
continue;
}
err = ext4_xattr_ensure_credits(handle, parent, credits, bh,
dirty, block_csum);
if (err) {
err = ext4_journal_ensure_credits_fn(handle, credits, credits,
ext4_free_metadata_revoke_credits(parent->i_sb, 1),
ext4_xattr_restart_fn(handle, parent, bh, block_csum,
dirty));
if (err < 0) {
ext4_warning_inode(ea_inode, "Ensure credits err=%d",
err);
continue;
}
if (err > 0) {
err = ext4_journal_get_write_access(handle, bh);
if (err) {
ext4_warning_inode(ea_inode,
"Re-get write access err=%d",
err);
continue;
}
}
err = ext4_xattr_inode_dec_ref(handle, ea_inode);
if (err) {
......@@ -2335,7 +2315,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
flags & XATTR_CREATE);
brelse(bh);
if (!ext4_handle_has_enough_credits(handle, credits)) {
if (jbd2_handle_buffer_credits(handle) < credits) {
error = -ENOSPC;
goto cleanup;
}
......@@ -2862,11 +2842,9 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
struct inode *ea_inode;
int error;
error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
NULL /* bh */,
false /* dirty */,
false /* block_csum */);
if (error) {
error = ext4_journal_ensure_credits(handle, extra_credits,
ext4_free_metadata_revoke_credits(inode->i_sb, 1));
if (error < 0) {
EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error);
goto cleanup;
}
......
......@@ -110,7 +110,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
int nblocks, space_left;
/* assert_spin_locked(&journal->j_state_lock); */
nblocks = jbd2_space_needed(journal);
nblocks = journal->j_max_transaction_buffers;
while (jbd2_log_space_left(journal) < nblocks) {
write_unlock(&journal->j_state_lock);
mutex_lock_io(&journal->j_checkpoint_mutex);
......
......@@ -560,8 +560,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
stats.run.rs_logging = jiffies;
stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
stats.run.rs_logging);
stats.run.rs_blocks =
atomic_read(&commit_transaction->t_outstanding_credits);
stats.run.rs_blocks = commit_transaction->t_nr_buffers;
stats.run.rs_blocks_logged = 0;
J_ASSERT(commit_transaction->t_nr_buffers <=
......@@ -642,8 +641,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
/*
* start_this_handle() uses t_outstanding_credits to determine
* the free space in the log, but this counter is changed
* by jbd2_journal_next_log_block() also.
* the free space in the log.
*/
atomic_dec(&commit_transaction->t_outstanding_credits);
......@@ -727,7 +725,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
}
cond_resched();
stats.run.rs_blocks_logged += bufs;
/* Force a new descriptor to be generated next
time round the loop. */
......@@ -814,6 +811,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (unlikely(!buffer_uptodate(bh)))
err = -EIO;
jbd2_unfile_log_bh(bh);
stats.run.rs_blocks_logged++;
/*
* The list contains temporary buffer heads created by
......@@ -859,6 +857,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
clear_buffer_jwrite(bh);
jbd2_unfile_log_bh(bh);
stats.run.rs_blocks_logged++;
__brelse(bh); /* One for getblk */
/* AKPM: bforget here */
}
......@@ -880,6 +879,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
}
if (cbh)
err = journal_wait_on_commit_record(journal, cbh);
stats.run.rs_blocks_logged++;
if (jbd2_has_feature_async_commit(journal) &&
journal->j_flags & JBD2_BARRIER) {
blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
......@@ -888,6 +888,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (err)
jbd2_journal_abort(journal, err);
WARN_ON_ONCE(
atomic_read(&commit_transaction->t_outstanding_credits) < 0);
/*
* Now disk caches for filesystem device are flushed so we are safe to
* erase checkpointed transactions from the log by updating journal
......
......@@ -840,6 +840,7 @@ jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
if (!bh)
return NULL;
atomic_dec(&transaction->t_outstanding_credits);
lock_buffer(bh);
memset(bh->b_data, 0, journal->j_blocksize);
header = (journal_header_t *)bh->b_data;
......@@ -1098,6 +1099,16 @@ static void jbd2_stats_proc_exit(journal_t *journal)
remove_proc_entry(journal->j_devname, proc_jbd2_stats);
}
/* Minimum size of descriptor tag */
static int jbd2_min_tag_size(void)
{
/*
* Tag with 32-bit block numbers does not use last four bytes of the
* structure
*/
return sizeof(journal_block_tag_t) - 4;
}
/*
* Management for journal control blocks: functions to create and
* destroy journal_t structures, and to initialise and read existing
......@@ -1156,7 +1167,8 @@ static journal_t *journal_init_common(struct block_device *bdev,
journal->j_fs_dev = fs_dev;
journal->j_blk_offset = start;
journal->j_maxlen = len;
n = journal->j_blocksize / sizeof(journal_block_tag_t);
/* We need enough buffers to write out full descriptor block. */
n = journal->j_blocksize / jbd2_min_tag_size();
journal->j_wbufsize = n;
journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
GFP_KERNEL);
......@@ -1488,6 +1500,21 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
}
EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
static int journal_revoke_records_per_block(journal_t *journal)
{
int record_size;
int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
if (jbd2_has_feature_64bit(journal))
record_size = 8;
else
record_size = 4;
if (jbd2_journal_has_csum_v2or3(journal))
space -= sizeof(struct jbd2_journal_block_tail);
return space / record_size;
}
/*
* Read the superblock for a given journal, performing initial
* validation of the format.
......@@ -1596,6 +1623,8 @@ static int journal_get_superblock(journal_t *journal)
sizeof(sb->s_uuid));
}
journal->j_revoke_records_per_block =
journal_revoke_records_per_block(journal);
set_buffer_verified(bh);
return 0;
......@@ -1916,6 +1945,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
sb->s_feature_ro_compat |= cpu_to_be32(ro);
sb->s_feature_incompat |= cpu_to_be32(incompat);
unlock_buffer(journal->j_sb_buffer);
journal->j_revoke_records_per_block =
journal_revoke_records_per_block(journal);
return 1;
#undef COMPAT_FEATURE_ON
......@@ -1946,6 +1977,8 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
sb->s_feature_compat &= ~cpu_to_be32(compat);
sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
sb->s_feature_incompat &= ~cpu_to_be32(incompat);
journal->j_revoke_records_per_block =
journal_revoke_records_per_block(journal);
}
EXPORT_SYMBOL(jbd2_journal_clear_features);
......
......@@ -371,6 +371,11 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
}
#endif
if (WARN_ON_ONCE(handle->h_revoke_credits <= 0)) {
if (!bh_in)
brelse(bh);
return -EIO;
}
/* We really ought not ever to revoke twice in a row without
first having the revoke cancelled: it's illegal to free a
block twice without allocating it in between! */
......@@ -391,6 +396,7 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
__brelse(bh);
}
}
handle->h_revoke_credits--;
jbd_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in);
err = insert_revoke_hash(journal, blocknr,
......
This diff is collapsed.
......@@ -2288,9 +2288,9 @@ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
int ret = 0;
int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
if (handle->h_buffer_credits < credits)
if (jbd2_handle_buffer_credits(handle) < credits)
ret = ocfs2_extend_trans(handle,
credits - handle->h_buffer_credits);
credits - jbd2_handle_buffer_credits(handle));
return ret;
}
......@@ -2367,7 +2367,7 @@ static int ocfs2_rotate_tree_right(handle_t *handle,
struct ocfs2_path *right_path,
struct ocfs2_path **ret_left_path)
{
int ret, start, orig_credits = handle->h_buffer_credits;
int ret, start, orig_credits = jbd2_handle_buffer_credits(handle);
u32 cpos;
struct ocfs2_path *left_path = NULL;
struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
......@@ -3148,7 +3148,7 @@ static int ocfs2_rotate_tree_left(handle_t *handle,
struct ocfs2_path *path,
struct ocfs2_cached_dealloc_ctxt *dealloc)
{
int ret, orig_credits = handle->h_buffer_credits;
int ret, orig_credits = jbd2_handle_buffer_credits(handle);
struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
......@@ -3386,8 +3386,8 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
right_path);
ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
handle->h_buffer_credits,
right_path);
jbd2_handle_buffer_credits(handle),
right_path);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -3548,8 +3548,8 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
right_path);
ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
handle->h_buffer_credits,
left_path);
jbd2_handle_buffer_credits(handle),
left_path);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -3623,7 +3623,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
le16_to_cpu(el->l_next_free_rec) == 1) {
/* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits,
jbd2_handle_buffer_credits(handle),
right_path);
if (ret) {
mlog_errno(ret);
......@@ -3669,7 +3669,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
/* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits,
jbd2_handle_buffer_credits(handle),
path);
if (ret) {
mlog_errno(ret);
......@@ -3725,7 +3725,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
/* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits,
jbd2_handle_buffer_credits(handle),
path);
if (ret) {
mlog_errno(ret);
......@@ -3755,7 +3755,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
/* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits,
jbd2_handle_buffer_credits(handle),
path);
if (ret) {
mlog_errno(ret);
......@@ -3799,7 +3799,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
if (ctxt->c_split_covers_rec) {
/* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits,
jbd2_handle_buffer_credits(handle),
path);
if (ret) {
mlog_errno(ret);
......@@ -5358,7 +5358,7 @@ static int ocfs2_truncate_rec(handle_t *handle,
if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
/* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits,
jbd2_handle_buffer_credits(handle),
path);
if (ret) {
mlog_errno(ret);
......@@ -5427,8 +5427,8 @@ static int ocfs2_truncate_rec(handle_t *handle,
}
ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits,
path);
jbd2_handle_buffer_credits(handle),
path);
if (ret) {
mlog_errno(ret);
goto out;
......
......@@ -419,14 +419,14 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
if (!nblocks)
return 0;
old_nblocks = handle->h_buffer_credits;
old_nblocks = jbd2_handle_buffer_credits(handle);
trace_ocfs2_extend_trans(old_nblocks, nblocks);
#ifdef CONFIG_OCFS2_DEBUG_FS
status = 1;
#else
status = jbd2_journal_extend(handle, nblocks);
status = jbd2_journal_extend(handle, nblocks, 0);
if (status < 0) {
mlog_errno(status);
goto bail;
......@@ -460,13 +460,13 @@ int ocfs2_allocate_extend_trans(handle_t *handle, int thresh)
BUG_ON(!handle);
old_nblks = handle->h_buffer_credits;
old_nblks = jbd2_handle_buffer_credits(handle);
trace_ocfs2_allocate_extend_trans(old_nblks, thresh);
if (old_nblks < thresh)
return 0;
status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA);
status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA, 0);
if (status < 0) {
mlog_errno(status);
goto bail;
......
......@@ -456,7 +456,9 @@ struct jbd2_revoke_table_s;
* @h_transaction: Which compound transaction is this update a part of?
* @h_journal: Which journal handle belongs to - used iff h_reserved set.
* @h_rsv_handle: Handle reserved for finishing the logical operation.
* @h_buffer_credits: Number of remaining buffers we are allowed to dirty.
* @h_total_credits: Number of remaining buffers we are allowed to add to
journal. These are dirty buffers and revoke descriptor blocks.
* @h_revoke_credits: Number of remaining revoke records available for handle
* @h_ref: Reference count on this handle.
* @h_err: Field for caller's use to track errors through large fs operations.
* @h_sync: Flag for sync-on-close.
......@@ -466,7 +468,8 @@ struct jbd2_revoke_table_s;
* @h_type: For handle statistics.
* @h_line_no: For handle statistics.
* @h_start_jiffies: Handle Start time.
* @h_requested_credits: Holds @h_buffer_credits after handle is started.
* @h_requested_credits: Holds @h_total_credits after handle is started.
* @h_revoke_credits_requested: Holds @h_revoke_credits after handle is started.
* @saved_alloc_context: Saved context while transaction is open.
**/
......@@ -483,7 +486,9 @@ struct jbd2_journal_handle
};
handle_t *h_rsv_handle;
int h_buffer_credits;
int h_total_credits;
int h_revoke_credits;
int h_revoke_credits_requested;
int h_ref;
int h_err;
......@@ -660,11 +665,24 @@ struct transaction_s
atomic_t t_updates;
/*
* Number of buffers reserved for use by all handles in this transaction
* handle but not yet modified. [none]
* Number of blocks reserved for this transaction in the journal.
* This is including all credits reserved when starting transaction
* handles as well as all journal descriptor blocks needed for this
* transaction. [none]
*/
atomic_t t_outstanding_credits;
/*
* Number of revoke records for this transaction added by already
* stopped handles. [none]
*/
atomic_t t_outstanding_revokes;
/*
* How many handles used this transaction? [none]
*/
atomic_t t_handle_count;
/*
* Forward and backward links for the circular list of all transactions
* awaiting checkpoint. [j_list_lock]
......@@ -682,11 +700,6 @@ struct transaction_s
*/
ktime_t t_start_time;
/*
* How many handles used this transaction? [none]
*/
atomic_t t_handle_count;
/*
* This transaction is being forced and some process is
* waiting for it to finish.
......@@ -1003,6 +1016,13 @@ struct journal_s
*/
int j_max_transaction_buffers;
/**
* @j_revoke_records_per_block:
*
* Number of revoke records that fit in one descriptor block.
*/
int j_revoke_records_per_block;
/**
* @j_commit_interval:
*
......@@ -1337,14 +1357,16 @@ static inline handle_t *journal_current_handle(void)
extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
extern handle_t *jbd2__journal_start(journal_t *, int blocks, int rsv_blocks,
gfp_t gfp_mask, unsigned int type,
unsigned int line_no);
int revoke_records, gfp_t gfp_mask,
unsigned int type, unsigned int line_no);
extern int jbd2_journal_restart(handle_t *, int nblocks);
extern int jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask);
extern int jbd2__journal_restart(handle_t *, int nblocks,
int revoke_records, gfp_t gfp_mask);
extern int jbd2_journal_start_reserved(handle_t *handle,
unsigned int type, unsigned int line_no);
extern void jbd2_journal_free_reserved(handle_t *handle);
extern int jbd2_journal_extend (handle_t *, int nblocks);
extern int jbd2_journal_extend(handle_t *handle, int nblocks,
int revoke_records);
extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
......@@ -1539,38 +1561,19 @@ static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
return journal->j_chksum_driver != NULL;
}
/*
* We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for
* transaction control blocks.
*/
#define JBD2_CONTROL_BLOCKS_SHIFT 5
/*
* Return the minimum number of blocks which must be free in the journal
* before a new transaction may be started. Must be called under j_state_lock.
*/
static inline int jbd2_space_needed(journal_t *journal)
{
int nblocks = journal->j_max_transaction_buffers;
return nblocks + (nblocks >> JBD2_CONTROL_BLOCKS_SHIFT);
}
/*
* Return number of free blocks in the log. Must be called under j_state_lock.
*/
static inline unsigned long jbd2_log_space_left(journal_t *journal)
{
/* Allow for rounding errors */
unsigned long free = journal->j_free - 32;
long free = journal->j_free - 32;
if (journal->j_committing_transaction) {
unsigned long committing = atomic_read(&journal->
j_committing_transaction->t_outstanding_credits);
/* Transaction + control blocks */
free -= committing + (committing >> JBD2_CONTROL_BLOCKS_SHIFT);
free -= atomic_read(&journal->
j_committing_transaction->t_outstanding_credits);
}
return free;
return max_t(long, free, 0);
}
/*
......@@ -1624,6 +1627,16 @@ static inline tid_t jbd2_get_latest_transaction(journal_t *journal)
return tid;
}
static inline int jbd2_handle_buffer_credits(handle_t *handle)
{
journal_t *journal = handle->h_transaction->t_journal;
return handle->h_total_credits -
DIV_ROUND_UP(handle->h_revoke_credits_requested,
journal->j_revoke_records_per_block);
}
#ifdef __KERNEL__
#define buffer_trace_init(bh) do {} while (0)
......
......@@ -1746,15 +1746,16 @@ TRACE_EVENT(ext4_load_inode,
TRACE_EVENT(ext4_journal_start,
TP_PROTO(struct super_block *sb, int blocks, int rsv_blocks,
unsigned long IP),
int revoke_creds, unsigned long IP),
TP_ARGS(sb, blocks, rsv_blocks, IP),
TP_ARGS(sb, blocks, rsv_blocks, revoke_creds, IP),
TP_STRUCT__entry(
__field( dev_t, dev )
__field(unsigned long, ip )
__field( int, blocks )
__field( int, rsv_blocks )
__field( int, revoke_creds )
),
TP_fast_assign(
......@@ -1762,11 +1763,13 @@ TRACE_EVENT(ext4_journal_start,
__entry->ip = IP;
__entry->blocks = blocks;
__entry->rsv_blocks = rsv_blocks;
__entry->revoke_creds = revoke_creds;
),
TP_printk("dev %d,%d blocks, %d rsv_blocks, %d caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->blocks, __entry->rsv_blocks, (void *)__entry->ip)
TP_printk("dev %d,%d blocks %d, rsv_blocks %d, revoke_creds %d, "
"caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->blocks, __entry->rsv_blocks, __entry->revoke_creds,
(void *)__entry->ip)
);
TRACE_EVENT(ext4_journal_start_reserved,
......
......@@ -133,7 +133,7 @@ TRACE_EVENT(jbd2_submit_inode_data,
(unsigned long) __entry->ino)
);
TRACE_EVENT(jbd2_handle_start,
DECLARE_EVENT_CLASS(jbd2_handle_start_class,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
unsigned int line_no, int requested_blocks),
......@@ -161,6 +161,20 @@ TRACE_EVENT(jbd2_handle_start,
__entry->type, __entry->line_no, __entry->requested_blocks)
);
DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_start,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
unsigned int line_no, int requested_blocks),
TP_ARGS(dev, tid, type, line_no, requested_blocks)
);
DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_restart,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
unsigned int line_no, int requested_blocks),
TP_ARGS(dev, tid, type, line_no, requested_blocks)
);
TRACE_EVENT(jbd2_handle_extend,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
unsigned int line_no, int buffer_credits,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment