Commit f5113eff authored by Jan Kara's avatar Jan Kara Committed by Theodore Ts'o

jbd2: don't create journal_head for temporary journal buffers

When writing metadata to the journal, we create temporary buffer heads
for that task.  We also attach journal heads to these buffer heads but
the only purpose of the journal heads is to keep buffers linked in
transaction's BJ_IO list.  We remove the need for journal heads by
reusing buffer_head's b_assoc_buffers list for that purpose.  Also
since BJ_IO list is just a temporary list for transaction commit, we
use a private list in jbd2_journal_commit_transaction() for that thus
removing BJ_IO list from transaction completely.
Reviewed-by: default avatarZheng Liu <wenqing.lz@taobao.com>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 97a851ed
...@@ -690,7 +690,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact ...@@ -690,7 +690,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
J_ASSERT(transaction->t_state == T_FINISHED); J_ASSERT(transaction->t_state == T_FINISHED);
J_ASSERT(transaction->t_buffers == NULL); J_ASSERT(transaction->t_buffers == NULL);
J_ASSERT(transaction->t_forget == NULL); J_ASSERT(transaction->t_forget == NULL);
J_ASSERT(transaction->t_iobuf_list == NULL);
J_ASSERT(transaction->t_shadow_list == NULL); J_ASSERT(transaction->t_shadow_list == NULL);
J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_log_list == NULL);
J_ASSERT(transaction->t_checkpoint_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL);
......
...@@ -369,7 +369,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -369,7 +369,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
{ {
struct transaction_stats_s stats; struct transaction_stats_s stats;
transaction_t *commit_transaction; transaction_t *commit_transaction;
struct journal_head *jh, *new_jh, *descriptor; struct journal_head *jh, *descriptor;
struct buffer_head **wbuf = journal->j_wbuf; struct buffer_head **wbuf = journal->j_wbuf;
int bufs; int bufs;
int flags; int flags;
...@@ -393,6 +393,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -393,6 +393,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
tid_t first_tid; tid_t first_tid;
int update_tail; int update_tail;
int csum_size = 0; int csum_size = 0;
LIST_HEAD(io_bufs);
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
csum_size = sizeof(struct jbd2_journal_block_tail); csum_size = sizeof(struct jbd2_journal_block_tail);
...@@ -659,29 +660,22 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -659,29 +660,22 @@ void jbd2_journal_commit_transaction(journal_t *journal)
/* Bump b_count to prevent truncate from stumbling over /* Bump b_count to prevent truncate from stumbling over
the shadowed buffer! @@@ This can go if we ever get the shadowed buffer! @@@ This can go if we ever get
rid of the BJ_IO/BJ_Shadow pairing of buffers. */ rid of the shadow pairing of buffers. */
atomic_inc(&jh2bh(jh)->b_count); atomic_inc(&jh2bh(jh)->b_count);
/* Make a temporary IO buffer with which to write it out
(this will requeue both the metadata buffer and the
temporary IO buffer). new_bh goes on BJ_IO*/
set_bit(BH_JWrite, &jh2bh(jh)->b_state);
/* /*
* akpm: jbd2_journal_write_metadata_buffer() sets * Make a temporary IO buffer with which to write it out
* new_bh->b_transaction to commit_transaction. * (this will requeue the metadata buffer to BJ_Shadow).
* We need to clean this up before we release new_bh
* (which is of type BJ_IO)
*/ */
set_bit(BH_JWrite, &jh2bh(jh)->b_state);
JBUFFER_TRACE(jh, "ph3: write metadata"); JBUFFER_TRACE(jh, "ph3: write metadata");
flags = jbd2_journal_write_metadata_buffer(commit_transaction, flags = jbd2_journal_write_metadata_buffer(commit_transaction,
jh, &new_jh, blocknr); jh, &wbuf[bufs], blocknr);
if (flags < 0) { if (flags < 0) {
jbd2_journal_abort(journal, flags); jbd2_journal_abort(journal, flags);
continue; continue;
} }
set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
wbuf[bufs++] = jh2bh(new_jh);
/* Record the new block's tag in the current descriptor /* Record the new block's tag in the current descriptor
buffer */ buffer */
...@@ -695,10 +689,11 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -695,10 +689,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
tag = (journal_block_tag_t *) tagp; tag = (journal_block_tag_t *) tagp;
write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
tag->t_flags = cpu_to_be16(tag_flag); tag->t_flags = cpu_to_be16(tag_flag);
jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
commit_transaction->t_tid); commit_transaction->t_tid);
tagp += tag_bytes; tagp += tag_bytes;
space_left -= tag_bytes; space_left -= tag_bytes;
bufs++;
if (first_tag) { if (first_tag) {
memcpy (tagp, journal->j_uuid, 16); memcpy (tagp, journal->j_uuid, 16);
...@@ -810,7 +805,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -810,7 +805,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
the log. Before we can commit it, wait for the IO so far to the log. Before we can commit it, wait for the IO so far to
complete. Control buffers being written are on the complete. Control buffers being written are on the
transaction's t_log_list queue, and metadata buffers are on transaction's t_log_list queue, and metadata buffers are on
the t_iobuf_list queue. the io_bufs list.
Wait for the buffers in reverse order. That way we are Wait for the buffers in reverse order. That way we are
less likely to be woken up until all IOs have completed, and less likely to be woken up until all IOs have completed, and
...@@ -819,46 +814,31 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -819,46 +814,31 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(3, "JBD2: commit phase 3\n"); jbd_debug(3, "JBD2: commit phase 3\n");
/* while (!list_empty(&io_bufs)) {
* akpm: these are BJ_IO, and j_list_lock is not needed. struct buffer_head *bh = list_entry(io_bufs.prev,
* See __journal_try_to_free_buffer. struct buffer_head,
*/ b_assoc_buffers);
wait_for_iobuf:
while (commit_transaction->t_iobuf_list != NULL) {
struct buffer_head *bh;
jh = commit_transaction->t_iobuf_list->b_tprev; wait_on_buffer(bh);
bh = jh2bh(jh); cond_resched();
if (buffer_locked(bh)) {
wait_on_buffer(bh);
goto wait_for_iobuf;
}
if (cond_resched())
goto wait_for_iobuf;
if (unlikely(!buffer_uptodate(bh))) if (unlikely(!buffer_uptodate(bh)))
err = -EIO; err = -EIO;
jbd2_unfile_log_bh(bh);
clear_buffer_jwrite(bh);
JBUFFER_TRACE(jh, "ph4: unfile after journal write");
jbd2_journal_unfile_buffer(journal, jh);
/* /*
* ->t_iobuf_list should contain only dummy buffer_heads * The list contains temporary buffer heads created by
* which were created by jbd2_journal_write_metadata_buffer(). * jbd2_journal_write_metadata_buffer().
*/ */
BUFFER_TRACE(bh, "dumping temporary bh"); BUFFER_TRACE(bh, "dumping temporary bh");
jbd2_journal_put_journal_head(jh);
__brelse(bh); __brelse(bh);
J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
free_buffer_head(bh); free_buffer_head(bh);
/* We also have to unlock and free the corresponding /* We also have to refile the corresponding shadowed buffer */
shadowed buffer */
jh = commit_transaction->t_shadow_list->b_tprev; jh = commit_transaction->t_shadow_list->b_tprev;
bh = jh2bh(jh); bh = jh2bh(jh);
clear_bit(BH_JWrite, &bh->b_state); clear_buffer_jwrite(bh);
J_ASSERT_BH(bh, buffer_jbddirty(bh)); J_ASSERT_BH(bh, buffer_jbddirty(bh));
/* The metadata is now released for reuse, but we need /* The metadata is now released for reuse, but we need
...@@ -953,7 +933,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -953,7 +933,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
J_ASSERT(list_empty(&commit_transaction->t_inode_list)); J_ASSERT(list_empty(&commit_transaction->t_inode_list));
J_ASSERT(commit_transaction->t_buffers == NULL); J_ASSERT(commit_transaction->t_buffers == NULL);
J_ASSERT(commit_transaction->t_checkpoint_list == NULL); J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
J_ASSERT(commit_transaction->t_iobuf_list == NULL);
J_ASSERT(commit_transaction->t_shadow_list == NULL); J_ASSERT(commit_transaction->t_shadow_list == NULL);
J_ASSERT(commit_transaction->t_log_list == NULL); J_ASSERT(commit_transaction->t_log_list == NULL);
......
...@@ -310,14 +310,12 @@ static void journal_kill_thread(journal_t *journal) ...@@ -310,14 +310,12 @@ static void journal_kill_thread(journal_t *journal)
* *
* If the source buffer has already been modified by a new transaction * If the source buffer has already been modified by a new transaction
* since we took the last commit snapshot, we use the frozen copy of * since we took the last commit snapshot, we use the frozen copy of
* that data for IO. If we end up using the existing buffer_head's data * that data for IO. If we end up using the existing buffer_head's data
* for the write, then we *have* to lock the buffer to prevent anyone * for the write, then we have to make sure nobody modifies it while the
* else from using and possibly modifying it while the IO is in * IO is in progress. do_get_write_access() handles this.
* progress.
* *
* The function returns a pointer to the buffer_heads to be used for IO. * The function returns a pointer to the buffer_head to be used for IO.
* *
* We assume that the journal has already been locked in this function.
* *
* Return value: * Return value:
* <0: Error * <0: Error
...@@ -330,15 +328,14 @@ static void journal_kill_thread(journal_t *journal) ...@@ -330,15 +328,14 @@ static void journal_kill_thread(journal_t *journal)
int jbd2_journal_write_metadata_buffer(transaction_t *transaction, int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct journal_head *jh_in, struct journal_head *jh_in,
struct journal_head **jh_out, struct buffer_head **bh_out,
unsigned long long blocknr) sector_t blocknr)
{ {
int need_copy_out = 0; int need_copy_out = 0;
int done_copy_out = 0; int done_copy_out = 0;
int do_escape = 0; int do_escape = 0;
char *mapped_data; char *mapped_data;
struct buffer_head *new_bh; struct buffer_head *new_bh;
struct journal_head *new_jh;
struct page *new_page; struct page *new_page;
unsigned int new_offset; unsigned int new_offset;
struct buffer_head *bh_in = jh2bh(jh_in); struct buffer_head *bh_in = jh2bh(jh_in);
...@@ -368,14 +365,13 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, ...@@ -368,14 +365,13 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
/* keep subsequent assertions sane */ /* keep subsequent assertions sane */
atomic_set(&new_bh->b_count, 1); atomic_set(&new_bh->b_count, 1);
new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
jbd_lock_bh_state(bh_in);
repeat:
/* /*
* If a new transaction has already done a buffer copy-out, then * If a new transaction has already done a buffer copy-out, then
* we use that version of the data for the commit. * we use that version of the data for the commit.
*/ */
jbd_lock_bh_state(bh_in);
repeat:
if (jh_in->b_frozen_data) { if (jh_in->b_frozen_data) {
done_copy_out = 1; done_copy_out = 1;
new_page = virt_to_page(jh_in->b_frozen_data); new_page = virt_to_page(jh_in->b_frozen_data);
...@@ -415,7 +411,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, ...@@ -415,7 +411,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
jbd_unlock_bh_state(bh_in); jbd_unlock_bh_state(bh_in);
tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
if (!tmp) { if (!tmp) {
jbd2_journal_put_journal_head(new_jh); brelse(new_bh);
return -ENOMEM; return -ENOMEM;
} }
jbd_lock_bh_state(bh_in); jbd_lock_bh_state(bh_in);
...@@ -426,7 +422,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, ...@@ -426,7 +422,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
jh_in->b_frozen_data = tmp; jh_in->b_frozen_data = tmp;
mapped_data = kmap_atomic(new_page); mapped_data = kmap_atomic(new_page);
memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); memcpy(tmp, mapped_data + new_offset, bh_in->b_size);
kunmap_atomic(mapped_data); kunmap_atomic(mapped_data);
new_page = virt_to_page(tmp); new_page = virt_to_page(tmp);
...@@ -452,14 +448,13 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, ...@@ -452,14 +448,13 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
} }
set_bh_page(new_bh, new_page, new_offset); set_bh_page(new_bh, new_page, new_offset);
new_jh->b_transaction = NULL; new_bh->b_size = bh_in->b_size;
new_bh->b_size = jh2bh(jh_in)->b_size; new_bh->b_bdev = journal->j_dev;
new_bh->b_bdev = transaction->t_journal->j_dev;
new_bh->b_blocknr = blocknr; new_bh->b_blocknr = blocknr;
set_buffer_mapped(new_bh); set_buffer_mapped(new_bh);
set_buffer_dirty(new_bh); set_buffer_dirty(new_bh);
*jh_out = new_jh; *bh_out = new_bh;
/* /*
* The to-be-written buffer needs to get moved to the io queue, * The to-be-written buffer needs to get moved to the io queue,
...@@ -472,9 +467,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, ...@@ -472,9 +467,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh_in); jbd_unlock_bh_state(bh_in);
JBUFFER_TRACE(new_jh, "file as BJ_IO");
jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);
return do_escape | (done_copy_out << 1); return do_escape | (done_copy_out << 1);
} }
......
...@@ -1601,10 +1601,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) ...@@ -1601,10 +1601,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
* Remove a buffer from the appropriate transaction list. * Remove a buffer from the appropriate transaction list.
* *
* Note that this function can *change* the value of * Note that this function can *change* the value of
* bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or
* t_log_list or t_reserved_list. If the caller is holding onto a copy of one * t_reserved_list. If the caller is holding onto a copy of one of these
* of these pointers, it could go bad. Generally the caller needs to re-read * pointers, it could go bad. Generally the caller needs to re-read the
* the pointer from the transaction_t. * pointer from the transaction_t.
* *
* Called under j_list_lock. * Called under j_list_lock.
*/ */
...@@ -1634,9 +1634,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) ...@@ -1634,9 +1634,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
case BJ_Forget: case BJ_Forget:
list = &transaction->t_forget; list = &transaction->t_forget;
break; break;
case BJ_IO:
list = &transaction->t_iobuf_list;
break;
case BJ_Shadow: case BJ_Shadow:
list = &transaction->t_shadow_list; list = &transaction->t_shadow_list;
break; break;
...@@ -2148,9 +2145,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, ...@@ -2148,9 +2145,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
case BJ_Forget: case BJ_Forget:
list = &transaction->t_forget; list = &transaction->t_forget;
break; break;
case BJ_IO:
list = &transaction->t_iobuf_list;
break;
case BJ_Shadow: case BJ_Shadow:
list = &transaction->t_shadow_list; list = &transaction->t_shadow_list;
break; break;
......
...@@ -523,12 +523,6 @@ struct transaction_s ...@@ -523,12 +523,6 @@ struct transaction_s
*/ */
struct journal_head *t_checkpoint_io_list; struct journal_head *t_checkpoint_io_list;
/*
* Doubly-linked circular list of temporary buffers currently undergoing
* IO in the log [j_list_lock]
*/
struct journal_head *t_iobuf_list;
/* /*
* Doubly-linked circular list of metadata buffers being shadowed by log * Doubly-linked circular list of metadata buffers being shadowed by log
* IO. The IO buffers on the iobuf list and the shadow buffers on this * IO. The IO buffers on the iobuf list and the shadow buffers on this
...@@ -991,6 +985,14 @@ extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, i ...@@ -991,6 +985,14 @@ extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, i
extern void __journal_free_buffer(struct journal_head *bh); extern void __journal_free_buffer(struct journal_head *bh);
extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
extern void __journal_clean_data_list(transaction_t *transaction); extern void __journal_clean_data_list(transaction_t *transaction);
static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh)
{
list_add_tail(&bh->b_assoc_buffers, head);
}
static inline void jbd2_unfile_log_bh(struct buffer_head *bh)
{
list_del_init(&bh->b_assoc_buffers);
}
/* Log buffer allocation */ /* Log buffer allocation */
extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *); extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *);
...@@ -1039,11 +1041,10 @@ extern void jbd2_buffer_abort_trigger(struct journal_head *jh, ...@@ -1039,11 +1041,10 @@ extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
struct jbd2_buffer_trigger_type *triggers); struct jbd2_buffer_trigger_type *triggers);
/* Buffer IO */ /* Buffer IO */
extern int extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in,
struct journal_head *jh_in, struct buffer_head **bh_out,
struct journal_head **jh_out, sector_t blocknr);
unsigned long long blocknr);
/* Transaction locking */ /* Transaction locking */
extern void __wait_on_journal (journal_t *); extern void __wait_on_journal (journal_t *);
...@@ -1286,11 +1287,10 @@ static inline int jbd_space_needed(journal_t *journal) ...@@ -1286,11 +1287,10 @@ static inline int jbd_space_needed(journal_t *journal)
#define BJ_None 0 /* Not journaled */ #define BJ_None 0 /* Not journaled */
#define BJ_Metadata 1 /* Normal journaled metadata */ #define BJ_Metadata 1 /* Normal journaled metadata */
#define BJ_Forget 2 /* Buffer superseded by this transaction */ #define BJ_Forget 2 /* Buffer superseded by this transaction */
#define BJ_IO 3 /* Buffer is for temporary IO use */ #define BJ_Shadow 3 /* Buffer contents being shadowed to the log */
#define BJ_Shadow 4 /* Buffer contents being shadowed to the log */ #define BJ_LogCtl 4 /* Buffer contains log descriptors */
#define BJ_LogCtl 5 /* Buffer contains log descriptors */ #define BJ_Reserved 5 /* Buffer is reserved for access by journal */
#define BJ_Reserved 6 /* Buffer is reserved for access by journal */ #define BJ_Types 6
#define BJ_Types 7
extern int jbd_blocks_per_page(struct inode *inode); extern int jbd_blocks_per_page(struct inode *inode);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment