Commit a931da6a authored by Theodore Ts'o's avatar Theodore Ts'o

jbd2: Change j_state_lock to be a rwlock_t

Lockstat reports have shown that j_state_lock is a major source of
lock contention, especially on systems with more than 4 CPU cores.  So
change it to be a read/write spinlock.
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent a51dca9c
......@@ -5066,7 +5066,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
transaction_t *transaction;
tid_t tid;
spin_lock(&journal->j_state_lock);
read_lock(&journal->j_state_lock);
if (journal->j_running_transaction)
transaction = journal->j_running_transaction;
else
......@@ -5075,7 +5075,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
tid = transaction->t_tid;
else
tid = journal->j_commit_sequence;
spin_unlock(&journal->j_state_lock);
read_unlock(&journal->j_state_lock);
ei->i_sync_tid = tid;
ei->i_datasync_tid = tid;
}
......
......@@ -3232,7 +3232,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_min_batch_time = sbi->s_min_batch_time;
journal->j_max_batch_time = sbi->s_max_batch_time;
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
if (test_opt(sb, BARRIER))
journal->j_flags |= JBD2_BARRIER;
else
......@@ -3241,7 +3241,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
else
journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
}
static journal_t *ext4_get_journal(struct super_block *sb,
......
......@@ -118,13 +118,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
void __jbd2_log_wait_for_space(journal_t *journal)
{
int nblocks, space_left;
assert_spin_locked(&journal->j_state_lock);
/* assert_spin_locked(&journal->j_state_lock); */
nblocks = jbd_space_needed(journal);
while (__jbd2_log_space_left(journal) < nblocks) {
if (journal->j_flags & JBD2_ABORT)
return;
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
mutex_lock(&journal->j_checkpoint_mutex);
/*
......@@ -138,7 +138,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
* filesystem, so abort the journal and leave a stack
* trace for forensic evidence.
*/
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock);
nblocks = jbd_space_needed(journal);
space_left = __jbd2_log_space_left(journal);
......@@ -149,7 +149,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
if (journal->j_committing_transaction)
tid = journal->j_committing_transaction->t_tid;
spin_unlock(&journal->j_list_lock);
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
if (chkpt) {
jbd2_log_do_checkpoint(journal);
} else if (jbd2_cleanup_journal_tail(journal) == 0) {
......@@ -167,7 +167,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
WARN_ON(1);
jbd2_journal_abort(journal, 0);
}
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
} else {
spin_unlock(&journal->j_list_lock);
}
......@@ -474,7 +474,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
* next transaction ID we will write, and where it will
* start. */
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock);
transaction = journal->j_checkpoint_transactions;
if (transaction) {
......@@ -496,7 +496,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/* If the oldest pinned transaction is at the tail of the log
already then there's not much we can do right now. */
if (journal->j_tail_sequence == first_tid) {
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
return 1;
}
......@@ -516,7 +516,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
journal->j_free += freed;
journal->j_tail_sequence = first_tid;
journal->j_tail = blocknr;
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
/*
* If there is an external journal, we need to make sure that
......
......@@ -152,9 +152,9 @@ static int journal_submit_commit_record(journal_t *journal,
printk(KERN_WARNING
"JBD2: Disabling barriers on %s, "
"not supported by device\n", journal->j_devname);
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_BARRIER;
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
/* And try again, without the barrier */
lock_buffer(bh);
......@@ -182,9 +182,9 @@ static int journal_wait_on_commit_record(journal_t *journal,
printk(KERN_WARNING
"JBD2: %s: disabling barries on %s - not supported "
"by device\n", __func__, journal->j_devname);
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_BARRIER;
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
lock_buffer(bh);
clear_buffer_dirty(bh);
......@@ -400,7 +400,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(1, "JBD: starting commit of transaction %d\n",
commit_transaction->t_tid);
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
commit_transaction->t_state = T_LOCKED;
/*
......@@ -424,9 +424,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
TASK_UNINTERRUPTIBLE);
if (atomic_read(&commit_transaction->t_updates)) {
spin_unlock(&commit_transaction->t_handle_lock);
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
schedule();
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
spin_lock(&commit_transaction->t_handle_lock);
}
finish_wait(&journal->j_wait_updates, &wait);
......@@ -497,7 +497,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
start_time = ktime_get();
commit_transaction->t_log_start = journal->j_head;
wake_up(&journal->j_wait_transaction_locked);
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
jbd_debug (3, "JBD: commit phase 2\n");
......@@ -519,9 +519,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* transaction! Now comes the tricky part: we need to write out
* metadata. Loop over the transaction's entire buffer list:
*/
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
commit_transaction->t_state = T_COMMIT;
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
trace_jbd2_commit_logging(journal, commit_transaction);
stats.run.rs_logging = jiffies;
......@@ -978,7 +978,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* __jbd2_journal_drop_transaction(). Otherwise we could race with
* other checkpointing code processing the transaction...
*/
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock);
/*
* Now recheck if some buffers did not get attached to the transaction
......@@ -986,7 +986,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/
if (commit_transaction->t_forget) {
spin_unlock(&journal->j_list_lock);
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
goto restart_loop;
}
......@@ -1038,7 +1038,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
journal->j_average_commit_time*3) / 4;
else
journal->j_average_commit_time = commit_time;
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
if (commit_transaction->t_checkpoint_list == NULL &&
commit_transaction->t_checkpoint_io_list == NULL) {
......
This diff is collapsed.
......@@ -124,36 +124,38 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
jbd_debug(3, "New handle %p going live.\n", handle);
repeat:
/*
* We need to hold j_state_lock until t_updates has been incremented,
* for proper journal barrier handling
*/
spin_lock(&journal->j_state_lock);
repeat_locked:
repeat:
read_lock(&journal->j_state_lock);
if (is_journal_aborted(journal) ||
(journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
spin_unlock(&journal->j_state_lock);
read_unlock(&journal->j_state_lock);
kfree(new_transaction);
return -EROFS;
}
/* Wait on the journal's transaction barrier if necessary */
if (journal->j_barrier_count) {
spin_unlock(&journal->j_state_lock);
read_unlock(&journal->j_state_lock);
wait_event(journal->j_wait_transaction_locked,
journal->j_barrier_count == 0);
goto repeat;
}
if (!journal->j_running_transaction) {
if (!new_transaction) {
spin_unlock(&journal->j_state_lock);
read_unlock(&journal->j_state_lock);
if (!new_transaction)
goto alloc_transaction;
write_lock(&journal->j_state_lock);
if (!journal->j_running_transaction) {
jbd2_get_transaction(journal, new_transaction);
new_transaction = NULL;
}
jbd2_get_transaction(journal, new_transaction);
new_transaction = NULL;
write_unlock(&journal->j_state_lock);
goto repeat;
}
transaction = journal->j_running_transaction;
......@@ -167,7 +169,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
prepare_to_wait(&journal->j_wait_transaction_locked,
&wait, TASK_UNINTERRUPTIBLE);
spin_unlock(&journal->j_state_lock);
read_unlock(&journal->j_state_lock);
schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait);
goto repeat;
......@@ -194,7 +196,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE);
__jbd2_log_start_commit(journal, transaction->t_tid);
spin_unlock(&journal->j_state_lock);
read_unlock(&journal->j_state_lock);
schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait);
goto repeat;
......@@ -228,8 +230,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
spin_unlock(&transaction->t_handle_lock);
__jbd2_log_wait_for_space(journal);
goto repeat_locked;
read_unlock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
__jbd2_log_wait_for_space(journal);
write_unlock(&journal->j_state_lock);
goto repeat;
}
/* OK, account for the buffers that this operation expects to
......@@ -250,7 +256,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
atomic_read(&transaction->t_outstanding_credits),
__jbd2_log_space_left(journal));
spin_unlock(&transaction->t_handle_lock);
spin_unlock(&journal->j_state_lock);
read_unlock(&journal->j_state_lock);
lock_map_acquire(&handle->h_lockdep_map);
kfree(new_transaction);
......@@ -362,7 +368,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
result = 1;
spin_lock(&journal->j_state_lock);
read_lock(&journal->j_state_lock);
/* Don't extend a locked-down transaction! */
if (handle->h_transaction->t_state != T_RUNNING) {
......@@ -394,7 +400,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
unlock:
spin_unlock(&transaction->t_handle_lock);
error_out:
spin_unlock(&journal->j_state_lock);
read_unlock(&journal->j_state_lock);
out:
return result;
}
......@@ -432,7 +438,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
J_ASSERT(atomic_read(&transaction->t_updates) > 0);
J_ASSERT(journal_current_handle() == handle);
spin_lock(&journal->j_state_lock);
read_lock(&journal->j_state_lock);
spin_lock(&transaction->t_handle_lock);
atomic_sub(handle->h_buffer_credits,
&transaction->t_outstanding_credits);
......@@ -442,7 +448,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
jbd_debug(2, "restarting handle %p\n", handle);
__jbd2_log_start_commit(journal, transaction->t_tid);
spin_unlock(&journal->j_state_lock);
read_unlock(&journal->j_state_lock);
lock_map_release(&handle->h_lockdep_map);
handle->h_buffer_credits = nblocks;
......@@ -472,7 +478,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
{
DEFINE_WAIT(wait);
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
++journal->j_barrier_count;
/* Wait until there are no running updates */
......@@ -490,12 +496,12 @@ void jbd2_journal_lock_updates(journal_t *journal)
prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE);
spin_unlock(&transaction->t_handle_lock);
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
schedule();
finish_wait(&journal->j_wait_updates, &wait);
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
}
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
/*
* We have now established a barrier against other normal updates, but
......@@ -519,9 +525,9 @@ void jbd2_journal_unlock_updates (journal_t *journal)
J_ASSERT(journal->j_barrier_count != 0);
mutex_unlock(&journal->j_barrier);
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
--journal->j_barrier_count;
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
wake_up(&journal->j_wait_transaction_locked);
}
......@@ -1314,9 +1320,9 @@ int jbd2_journal_stop(handle_t *handle)
journal->j_last_sync_writer = pid;
spin_lock(&journal->j_state_lock);
read_lock(&journal->j_state_lock);
commit_time = journal->j_average_commit_time;
spin_unlock(&journal->j_state_lock);
read_unlock(&journal->j_state_lock);
trans_time = ktime_to_ns(ktime_sub(ktime_get(),
transaction->t_start_time));
......@@ -1748,7 +1754,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
goto zap_buffer_unlocked;
/* OK, we have data buffer in journaled mode */
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
......@@ -1801,7 +1807,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
return ret;
} else {
/* There is no currently-running transaction. So the
......@@ -1815,7 +1821,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
return ret;
} else {
/* The orphan record's transaction has
......@@ -1839,7 +1845,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
return 0;
} else {
/* Good, the buffer belongs to the running transaction.
......@@ -1858,7 +1864,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
zap_buffer_no_jh:
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
zap_buffer_unlocked:
clear_buffer_dirty(bh);
J_ASSERT_BH(bh, !buffer_jbddirty(bh));
......@@ -2165,9 +2171,9 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
/* Locks are here just to force reading of recent values, it is
* enough that the transaction was not committing before we started
* a transaction adding the inode to orphan list */
spin_lock(&journal->j_state_lock);
read_lock(&journal->j_state_lock);
commit_trans = journal->j_committing_transaction;
spin_unlock(&journal->j_state_lock);
read_unlock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock);
inode_trans = jinode->i_transaction;
spin_unlock(&journal->j_list_lock);
......
......@@ -760,13 +760,13 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb)
if (osb->osb_commit_interval)
commit_interval = osb->osb_commit_interval;
spin_lock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
journal->j_commit_interval = commit_interval;
if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
journal->j_flags |= JBD2_BARRIER;
else
journal->j_flags &= ~JBD2_BARRIER;
spin_unlock(&journal->j_state_lock);
write_unlock(&journal->j_state_lock);
}
int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
......
......@@ -764,7 +764,7 @@ struct journal_s
/*
* Protect the various scalars in the journal
*/
spinlock_t j_state_lock;
rwlock_t j_state_lock;
/*
* Number of processes waiting to create a barrier lock [j_state_lock]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment