Commit e821ceb2 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] JBD: implement b_transaction locking rules

Go through all use of b_transaction and implement the rules.

Fairly straightforward.
parent b07da5e5
...@@ -50,6 +50,7 @@ static inline void __buffer_unlink(struct journal_head *jh) ...@@ -50,6 +50,7 @@ static inline void __buffer_unlink(struct journal_head *jh)
* Try to release a checkpointed buffer from its transaction. * Try to release a checkpointed buffer from its transaction.
* Returns 1 if we released it. * Returns 1 if we released it.
* Requires journal_datalist_lock * Requires journal_datalist_lock
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/ */
static int __try_to_free_cp_buf(struct journal_head *jh) static int __try_to_free_cp_buf(struct journal_head *jh)
{ {
...@@ -59,10 +60,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh) ...@@ -59,10 +60,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
JBUFFER_TRACE(jh, "remove from checkpoint list"); JBUFFER_TRACE(jh, "remove from checkpoint list");
__journal_remove_checkpoint(jh); __journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh); journal_remove_journal_head(bh);
BUFFER_TRACE(bh, "release"); BUFFER_TRACE(bh, "release");
__brelse(bh); __brelse(bh);
ret = 1; ret = 1;
} else {
jbd_unlock_bh_state(bh);
} }
return ret; return ret;
} }
...@@ -92,6 +96,20 @@ void log_wait_for_space(journal_t *journal, int nblocks) ...@@ -92,6 +96,20 @@ void log_wait_for_space(journal_t *journal, int nblocks)
} }
} }
/*
* We were unable to perform jbd_trylock_bh_state() inside
* journal_datalist_lock. The caller must restart a list walk. Wait for
* someone else to run jbd_unlock_bh_state().
*/
static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
{
get_bh(bh);
spin_unlock(&journal_datalist_lock);
jbd_lock_bh_state(bh);
jbd_unlock_bh_state(bh);
put_bh(bh);
}
/* /*
* Clean up a transaction's checkpoint list. * Clean up a transaction's checkpoint list.
* *
...@@ -131,12 +149,21 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) ...@@ -131,12 +149,21 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
__brelse(bh); __brelse(bh);
goto out_return_1; goto out_return_1;
} }
/*
* This is foul
*/
if (!jbd_trylock_bh_state(bh)) {
jbd_sync_bh(journal, bh);
goto out_return_1;
}
if (jh->b_transaction != NULL) { if (jh->b_transaction != NULL) {
transaction_t *transaction = jh->b_transaction; transaction_t *transaction = jh->b_transaction;
tid_t tid = transaction->t_tid; tid_t tid = transaction->t_tid;
spin_unlock(&journal_datalist_lock); spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
log_start_commit(journal, transaction); log_start_commit(journal, transaction);
unlock_journal(journal); unlock_journal(journal);
log_wait_commit(journal, tid); log_wait_commit(journal, tid);
...@@ -156,11 +183,13 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) ...@@ -156,11 +183,13 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) { if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
BUFFER_TRACE(bh, "remove from checkpoint"); BUFFER_TRACE(bh, "remove from checkpoint");
__journal_remove_checkpoint(jh); __journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh); journal_remove_journal_head(bh);
__brelse(bh); __brelse(bh);
ret = 1; ret = 1;
} else {
jbd_unlock_bh_state(bh);
} }
jh = next_jh; jh = next_jh;
} while (jh != last_jh); } while (jh != last_jh);
...@@ -197,6 +226,7 @@ static void __flush_batch(struct buffer_head **bhs, int *batch_count) ...@@ -197,6 +226,7 @@ static void __flush_batch(struct buffer_head **bhs, int *batch_count)
* scan of the checkpoint list. * scan of the checkpoint list.
* *
* Called with journal_datalist_lock held. * Called with journal_datalist_lock held.
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/ */
static int __flush_buffer(journal_t *journal, struct journal_head *jh, static int __flush_buffer(journal_t *journal, struct journal_head *jh,
struct buffer_head **bhs, int *batch_count, struct buffer_head **bhs, int *batch_count,
...@@ -216,10 +246,11 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh, ...@@ -216,10 +246,11 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
* disk, as that would break recoverability. * disk, as that would break recoverability.
*/ */
BUFFER_TRACE(bh, "queue"); BUFFER_TRACE(bh, "queue");
atomic_inc(&bh->b_count); get_bh(bh);
J_ASSERT_BH(bh, !test_bit(BH_JWrite, &bh->b_state)); J_ASSERT_BH(bh, !buffer_jwrite(bh));
set_bit(BH_JWrite, &bh->b_state); set_buffer_jwrite(bh);
bhs[*batch_count] = bh; bhs[*batch_count] = bh;
jbd_unlock_bh_state(bh);
(*batch_count)++; (*batch_count)++;
if (*batch_count == NR_BATCH) { if (*batch_count == NR_BATCH) {
__flush_batch(bhs, batch_count); __flush_batch(bhs, batch_count);
...@@ -302,8 +333,16 @@ int log_do_checkpoint (journal_t *journal, int nblocks) ...@@ -302,8 +333,16 @@ int log_do_checkpoint (journal_t *journal, int nblocks)
last_jh = jh->b_cpprev; last_jh = jh->b_cpprev;
next_jh = jh; next_jh = jh;
do { do {
struct buffer_head *bh;
jh = next_jh; jh = next_jh;
next_jh = jh->b_cpnext; next_jh = jh->b_cpnext;
bh = jh2bh(jh);
if (!jbd_trylock_bh_state(bh)) {
jbd_sync_bh(journal, bh);
spin_lock(&journal_datalist_lock);
break;
}
retry = __flush_buffer(journal, jh, bhs, &batch_count, retry = __flush_buffer(journal, jh, bhs, &batch_count,
&drop_count); &drop_count);
} while (jh != last_jh && !retry); } while (jh != last_jh && !retry);
...@@ -439,10 +478,13 @@ int __journal_clean_checkpoint_list(journal_t *journal) ...@@ -439,10 +478,13 @@ int __journal_clean_checkpoint_list(journal_t *journal)
if (jh) { if (jh) {
struct journal_head *last_jh = jh->b_cpprev; struct journal_head *last_jh = jh->b_cpprev;
struct journal_head *next_jh = jh; struct journal_head *next_jh = jh;
do { do {
jh = next_jh; jh = next_jh;
next_jh = jh->b_cpnext; next_jh = jh->b_cpnext;
ret += __try_to_free_cp_buf(jh); /* Use trylock because of the ranknig */
if (jbd_trylock_bh_state(jh2bh(jh)))
ret += __try_to_free_cp_buf(jh);
} while (jh != last_jh); } while (jh != last_jh);
} }
} while (transaction != last_transaction); } while (transaction != last_transaction);
......
...@@ -465,9 +465,10 @@ void journal_commit_transaction(journal_t *journal) ...@@ -465,9 +465,10 @@ void journal_commit_transaction(journal_t *journal)
* akpm: these are BJ_IO, and journal_datalist_lock is not needed. * akpm: these are BJ_IO, and journal_datalist_lock is not needed.
* See __journal_try_to_free_buffer. * See __journal_try_to_free_buffer.
*/ */
wait_for_iobuf: wait_for_iobuf:
while (commit_transaction->t_iobuf_list != NULL) { while (commit_transaction->t_iobuf_list != NULL) {
struct buffer_head *bh; struct buffer_head *bh;
jh = commit_transaction->t_iobuf_list->b_tprev; jh = commit_transaction->t_iobuf_list->b_tprev;
bh = jh2bh(jh); bh = jh2bh(jh);
if (buffer_locked(bh)) { if (buffer_locked(bh)) {
...@@ -479,7 +480,7 @@ void journal_commit_transaction(journal_t *journal) ...@@ -479,7 +480,7 @@ void journal_commit_transaction(journal_t *journal)
goto wait_for_iobuf; goto wait_for_iobuf;
} }
clear_bit(BH_JWrite, &jh2bh(jh)->b_state); clear_buffer_jwrite(bh);
JBUFFER_TRACE(jh, "ph4: unfile after journal write"); JBUFFER_TRACE(jh, "ph4: unfile after journal write");
journal_unfile_buffer(jh); journal_unfile_buffer(jh);
...@@ -495,7 +496,6 @@ void journal_commit_transaction(journal_t *journal) ...@@ -495,7 +496,6 @@ void journal_commit_transaction(journal_t *journal)
* ->t_iobuf_list should contain only dummy buffer_heads * ->t_iobuf_list should contain only dummy buffer_heads
* which were created by journal_write_metadata_buffer(). * which were created by journal_write_metadata_buffer().
*/ */
bh = jh2bh(jh);
BUFFER_TRACE(bh, "dumping temporary bh"); BUFFER_TRACE(bh, "dumping temporary bh");
journal_put_journal_head(jh); journal_put_journal_head(jh);
__brelse(bh); __brelse(bh);
...@@ -637,6 +637,8 @@ void journal_commit_transaction(journal_t *journal) ...@@ -637,6 +637,8 @@ void journal_commit_transaction(journal_t *journal)
struct buffer_head *bh; struct buffer_head *bh;
jh = commit_transaction->t_forget; jh = commit_transaction->t_forget;
bh = jh2bh(jh);
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
jh->b_transaction == journal->j_running_transaction); jh->b_transaction == journal->j_running_transaction);
...@@ -650,7 +652,6 @@ void journal_commit_transaction(journal_t *journal) ...@@ -650,7 +652,6 @@ void journal_commit_transaction(journal_t *journal)
* *
* Otherwise, we can just throw away the frozen data now. * Otherwise, we can just throw away the frozen data now.
*/ */
jbd_lock_bh_state(jh2bh(jh));
if (jh->b_committed_data) { if (jh->b_committed_data) {
kfree(jh->b_committed_data); kfree(jh->b_committed_data);
jh->b_committed_data = NULL; jh->b_committed_data = NULL;
...@@ -676,7 +677,6 @@ void journal_commit_transaction(journal_t *journal) ...@@ -676,7 +677,6 @@ void journal_commit_transaction(journal_t *journal)
* by journal_forget, it may no longer be dirty and * by journal_forget, it may no longer be dirty and
* there's no point in keeping a checkpoint record for * there's no point in keeping a checkpoint record for
* it. */ * it. */
bh = jh2bh(jh);
/* A buffer which has been freed while still being /* A buffer which has been freed while still being
* journaled by a previous transaction may end up still * journaled by a previous transaction may end up still
......
...@@ -259,56 +259,6 @@ static void journal_kill_thread(journal_t *journal) ...@@ -259,56 +259,6 @@ static void journal_kill_thread(journal_t *journal)
} }
} }
#if 0
This is no longer needed - we do it in commit quite efficiently.
Note that if this function is resurrected, the loop needs to
be reorganised into the next_jh/last_jh algorithm.
/*
* journal_clean_data_list: cleanup after data IO.
*
* Once the IO system has finished writing the buffers on the transaction's
* data list, we can remove those buffers from the list. This function
* scans the list for such buffers and removes them cleanly.
*
* We assume that the journal is already locked.
* We are called with journal_datalist_lock held.
*
* AKPM: This function looks inefficient. Approximately O(n^2)
* for potentially thousands of buffers. It no longer shows on profiles
* because these buffers are mainly dropped in journal_commit_transaction().
*/
void __journal_clean_data_list(transaction_t *transaction)
{
struct journal_head *jh, *next;
assert_spin_locked(&journal_datalist_lock);
restart:
jh = transaction->t_sync_datalist;
if (!jh)
goto out;
do {
next = jh->b_tnext;
if (!buffer_locked(jh2bh(jh)) && !buffer_dirty(jh2bh(jh))) {
struct buffer_head *bh = jh2bh(jh);
BUFFER_TRACE(bh, "data writeout complete: unfile");
__journal_unfile_buffer(jh);
jh->b_transaction = NULL;
journal_remove_journal_head(bh);
__brelse(bh);
goto restart;
}
jh = next;
} while (transaction->t_sync_datalist &&
jh != transaction->t_sync_datalist);
out:
return;
}
#endif
/* /*
* journal_write_metadata_buffer: write a metadata buffer to the journal. * journal_write_metadata_buffer: write a metadata buffer to the journal.
* *
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment