Commit 1c565833 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

perf/x86/intel: Correct local vs remote sibling state

For some obscure reason the current code accounts the current SMT
thread's state on the remote thread and reads the remote's state on
the local SMT thread.

While internally consistent, and 'correct' its pointless confusion we
can do without.

Flip them the right way around.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent adafa999
...@@ -1903,9 +1903,8 @@ static void ...@@ -1903,9 +1903,8 @@ static void
intel_start_scheduling(struct cpu_hw_events *cpuc) intel_start_scheduling(struct cpu_hw_events *cpuc)
{ {
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct intel_excl_states *xl, *xlo; struct intel_excl_states *xl;
int tid = cpuc->excl_thread_id; int tid = cpuc->excl_thread_id;
int o_tid = 1 - tid; /* sibling thread */
/* /*
* nothing needed if in group validation mode * nothing needed if in group validation mode
...@@ -1919,7 +1918,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc) ...@@ -1919,7 +1918,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
if (!excl_cntrs) if (!excl_cntrs)
return; return;
xlo = &excl_cntrs->states[o_tid];
xl = &excl_cntrs->states[tid]; xl = &excl_cntrs->states[tid];
xl->sched_started = true; xl->sched_started = true;
...@@ -1932,18 +1930,17 @@ intel_start_scheduling(struct cpu_hw_events *cpuc) ...@@ -1932,18 +1930,17 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
raw_spin_lock(&excl_cntrs->lock); raw_spin_lock(&excl_cntrs->lock);
/* /*
* save initial state of sibling thread * Save a copy of our state to work on.
*/ */
memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state)); memcpy(xl->init_state, xl->state, sizeof(xl->init_state));
} }
static void static void
intel_stop_scheduling(struct cpu_hw_events *cpuc) intel_stop_scheduling(struct cpu_hw_events *cpuc)
{ {
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct intel_excl_states *xl, *xlo; struct intel_excl_states *xl;
int tid = cpuc->excl_thread_id; int tid = cpuc->excl_thread_id;
int o_tid = 1 - tid; /* sibling thread */
/* /*
* nothing needed if in group validation mode * nothing needed if in group validation mode
...@@ -1956,13 +1953,12 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc) ...@@ -1956,13 +1953,12 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
if (!excl_cntrs) if (!excl_cntrs)
return; return;
xlo = &excl_cntrs->states[o_tid];
xl = &excl_cntrs->states[tid]; xl = &excl_cntrs->states[tid];
/* /*
* make new sibling thread state visible * Commit the working state.
*/ */
memcpy(xlo->state, xlo->init_state, sizeof(xlo->state)); memcpy(xl->state, xl->init_state, sizeof(xl->state));
xl->sched_started = false; xl->sched_started = false;
/* /*
...@@ -1977,10 +1973,9 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, ...@@ -1977,10 +1973,9 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
{ {
struct event_constraint *cx; struct event_constraint *cx;
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct intel_excl_states *xl, *xlo; struct intel_excl_states *xlo;
int is_excl, i;
int tid = cpuc->excl_thread_id; int tid = cpuc->excl_thread_id;
int o_tid = 1 - tid; /* alternate */ int is_excl, i;
/* /*
* validating a group does not require * validating a group does not require
...@@ -1994,23 +1989,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, ...@@ -1994,23 +1989,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
*/ */
if (!excl_cntrs) if (!excl_cntrs)
return c; return c;
/*
* event requires exclusive counter access
* across HT threads
*/
is_excl = c->flags & PERF_X86_EVENT_EXCL;
if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
if (!cpuc->n_excl++)
WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
}
/*
* xl = state of current HT
* xlo = state of sibling HT
*/
xl = &excl_cntrs->states[tid];
xlo = &excl_cntrs->states[o_tid];
cx = c; cx = c;
...@@ -2053,6 +2031,22 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, ...@@ -2053,6 +2031,22 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* of this function * of this function
*/ */
/*
* state of sibling HT
*/
xlo = &excl_cntrs->states[tid ^ 1];
/*
* event requires exclusive counter access
* across HT threads
*/
is_excl = c->flags & PERF_X86_EVENT_EXCL;
if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
if (!cpuc->n_excl++)
WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
}
/* /*
* Modify static constraint with current dynamic * Modify static constraint with current dynamic
* state of thread * state of thread
...@@ -2067,14 +2061,14 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, ...@@ -2067,14 +2061,14 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* our corresponding counter cannot be used * our corresponding counter cannot be used
* regardless of our event * regardless of our event
*/ */
if (xl->state[i] == INTEL_EXCL_EXCLUSIVE) if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
__clear_bit(i, cx->idxmsk); __clear_bit(i, cx->idxmsk);
/* /*
* if measuring an exclusive event, sibling * if measuring an exclusive event, sibling
* measuring non-exclusive, then counter cannot * measuring non-exclusive, then counter cannot
* be used * be used
*/ */
if (is_excl && xl->state[i] == INTEL_EXCL_SHARED) if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
__clear_bit(i, cx->idxmsk); __clear_bit(i, cx->idxmsk);
} }
...@@ -2124,10 +2118,9 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, ...@@ -2124,10 +2118,9 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
{ {
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct intel_excl_states *xlo, *xl;
unsigned long flags = 0; /* keep compiler happy */
int tid = cpuc->excl_thread_id; int tid = cpuc->excl_thread_id;
int o_tid = 1 - tid; struct intel_excl_states *xl;
unsigned long flags = 0; /* keep compiler happy */
/* /*
* nothing needed if in group validation mode * nothing needed if in group validation mode
...@@ -2141,7 +2134,6 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, ...@@ -2141,7 +2134,6 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
return; return;
xl = &excl_cntrs->states[tid]; xl = &excl_cntrs->states[tid];
xlo = &excl_cntrs->states[o_tid];
if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
if (!--cpuc->n_excl) if (!--cpuc->n_excl)
...@@ -2161,7 +2153,7 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, ...@@ -2161,7 +2153,7 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
* counter state as unused now * counter state as unused now
*/ */
if (hwc->idx >= 0) if (hwc->idx >= 0)
xlo->state[hwc->idx] = INTEL_EXCL_UNUSED; xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
if (!xl->sched_started) if (!xl->sched_started)
raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags); raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
...@@ -2200,16 +2192,12 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt ...@@ -2200,16 +2192,12 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt
{ {
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct event_constraint *c = cpuc->event_constraint[idx]; struct event_constraint *c = cpuc->event_constraint[idx];
struct intel_excl_states *xlo, *xl; struct intel_excl_states *xl;
int tid = cpuc->excl_thread_id; int tid = cpuc->excl_thread_id;
int o_tid = 1 - tid;
int is_excl;
if (cpuc->is_fake || !c) if (cpuc->is_fake || !c)
return; return;
is_excl = c->flags & PERF_X86_EVENT_EXCL;
if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
return; return;
...@@ -2219,15 +2207,14 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt ...@@ -2219,15 +2207,14 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt
return; return;
xl = &excl_cntrs->states[tid]; xl = &excl_cntrs->states[tid];
xlo = &excl_cntrs->states[o_tid];
WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock)); WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
if (cntr >= 0) { if (cntr >= 0) {
if (is_excl) if (c->flags & PERF_X86_EVENT_EXCL)
xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE; xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
else else
xlo->init_state[cntr] = INTEL_EXCL_SHARED; xl->init_state[cntr] = INTEL_EXCL_SHARED;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment