Commit e2fd9d35 authored by Paul E. McKenney's avatar Paul E. McKenney

rcu: Remove expedited GP funnel-lock bypass

Commit #cdacbe1f ("rcu: Add fastpath bypassing funnel locking")
turns out to be a pessimization at high load because it forces a tree
full of tasks to wait for an expedited grace period that they probably
do not need.  This commit therefore removes this optimization.
Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
parent 4f415302
...@@ -237,17 +237,17 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of ...@@ -237,17 +237,17 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
The output of "cat rcu/rcu_preempt/rcuexp" looks as follows: The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872 s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
These fields are as follows: These fields are as follows:
o "s" is the sequence number, with an odd number indicating that o "s" is the sequence number, with an odd number indicating that
an expedited grace period is in progress. an expedited grace period is in progress.
o "wd0", "wd1", "wd2", and "wd3" are the number of times that an o "wd1", "wd2", and "wd3" are the number of times that an attempt
attempt to start an expedited grace period found that someone to start an expedited grace period found that someone else had
else had completed an expedited grace period that satisfies the completed an expedited grace period that satisfies the attempted
attempted request. "Our work is done." request. "Our work is done."
o "n" is number of times that a concurrent CPU-hotplug operation o "n" is number of times that a concurrent CPU-hotplug operation
forced a fallback to a normal grace period. forced a fallback to a normal grace period.
......
...@@ -3616,25 +3616,6 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) ...@@ -3616,25 +3616,6 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
struct rcu_node *rnp0; struct rcu_node *rnp0;
struct rcu_node *rnp1 = NULL; struct rcu_node *rnp1 = NULL;
/*
* First try directly acquiring the root lock in order to reduce
* latency in the common case where expedited grace periods are
* rare. We check mutex_is_locked() to avoid pathological levels of
* memory contention on ->exp_funnel_mutex in the heavy-load case.
*/
rnp0 = rcu_get_root(rsp);
if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
trace_rcu_exp_funnel_lock(rsp->name, rnp0->level,
rnp0->grplo, rnp0->grphi,
TPS("acq"));
if (sync_exp_work_done(rsp, rnp0, NULL,
&rdp->expedited_workdone0, s))
return NULL;
return rnp0;
}
}
/* /*
* Each pass through the following loop works its way * Each pass through the following loop works its way
* up the rcu_node tree, returning if others have done the * up the rcu_node tree, returning if others have done the
......
...@@ -388,7 +388,6 @@ struct rcu_data { ...@@ -388,7 +388,6 @@ struct rcu_data {
struct rcu_head oom_head; struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
struct mutex exp_funnel_mutex; struct mutex exp_funnel_mutex;
atomic_long_t expedited_workdone0; /* # done by others #0. */
atomic_long_t expedited_workdone1; /* # done by others #1. */ atomic_long_t expedited_workdone1; /* # done by others #1. */
atomic_long_t expedited_workdone2; /* # done by others #2. */ atomic_long_t expedited_workdone2; /* # done by others #2. */
atomic_long_t expedited_workdone3; /* # done by others #3. */ atomic_long_t expedited_workdone3; /* # done by others #3. */
......
...@@ -185,17 +185,16 @@ static int show_rcuexp(struct seq_file *m, void *v) ...@@ -185,17 +185,16 @@ static int show_rcuexp(struct seq_file *m, void *v)
int cpu; int cpu;
struct rcu_state *rsp = (struct rcu_state *)m->private; struct rcu_state *rsp = (struct rcu_state *)m->private;
struct rcu_data *rdp; struct rcu_data *rdp;
unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0; unsigned long s1 = 0, s2 = 0, s3 = 0;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(rsp->rda, cpu); rdp = per_cpu_ptr(rsp->rda, cpu);
s0 += atomic_long_read(&rdp->expedited_workdone0);
s1 += atomic_long_read(&rdp->expedited_workdone1); s1 += atomic_long_read(&rdp->expedited_workdone1);
s2 += atomic_long_read(&rdp->expedited_workdone2); s2 += atomic_long_read(&rdp->expedited_workdone2);
s3 += atomic_long_read(&rdp->expedited_workdone3); s3 += atomic_long_read(&rdp->expedited_workdone3);
} }
seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
rsp->expedited_sequence, s0, s1, s2, s3, rsp->expedited_sequence, s1, s2, s3,
atomic_long_read(&rsp->expedited_normal), atomic_long_read(&rsp->expedited_normal),
atomic_read(&rsp->expedited_need_qs), atomic_read(&rsp->expedited_need_qs),
rsp->expedited_sequence / 2); rsp->expedited_sequence / 2);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment