Commit 530bfff6 authored by Kan Liang's avatar Kan Liang Committed by Peter Zijlstra

perf/x86/intel/lbr: Factor out a new struct for generic optimization

To reduce the overhead of a context switch with LBR enabled, some
generic optimizations were introduced, e.g. avoiding restore LBR if no
one else touched them. The generic optimizations can also be used by
Architecture LBR later. Currently, the fields for the generic
optimizations are part of structure x86_perf_task_context, which will be
deprecated by Architecture LBR. A new structure should be introduced
for the common fields of generic optimization, which can be shared
between Architecture LBR and model-specific LBR.

Both 'valid_lbrs' and 'tos' are also used by the generic optimizations,
but they are not moved into the new structure, because Architecture LBR
is stack-like. The 'valid_lbrs' which records the index of the valid LBR
is not required anymore. The TOS MSR will be removed.

LBR registers may be cleared in the deep Cstate. If so, the generic
optimizations should not be applied. Perf has to unconditionally
restore the LBR registers. A generic function is required to detect the
reset due to the deep Cstate. lbr_is_reset_in_cstate() is introduced.
Currently, for the model-specific LBR, the TOS MSR is used to detect the
reset. There will be another method introduced for Architecture LBR
later.
Signed-off-by: default avatarKan Liang <kan.liang@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/1593780569-62993-6-git-send-email-kan.liang@linux.intel.com
parent 799571bf
...@@ -355,33 +355,37 @@ void intel_pmu_lbr_restore(void *ctx) ...@@ -355,33 +355,37 @@ void intel_pmu_lbr_restore(void *ctx)
wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
} }
static __always_inline bool
lbr_is_reset_in_cstate(struct x86_perf_task_context *task_ctx)
{
return !rdlbr_from(task_ctx->tos);
}
static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
{ {
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
u64 tos;
if (task_ctx->lbr_callstack_users == 0 || if (task_ctx->opt.lbr_callstack_users == 0 ||
task_ctx->lbr_stack_state == LBR_NONE) { task_ctx->opt.lbr_stack_state == LBR_NONE) {
intel_pmu_lbr_reset(); intel_pmu_lbr_reset();
return; return;
} }
tos = task_ctx->tos;
/* /*
* Does not restore the LBR registers, if * Does not restore the LBR registers, if
* - No one else touched them, and * - No one else touched them, and
* - Did not enter C6 * - Was not cleared in Cstate
*/ */
if ((task_ctx == cpuc->last_task_ctx) && if ((task_ctx == cpuc->last_task_ctx) &&
(task_ctx->log_id == cpuc->last_log_id) && (task_ctx->opt.log_id == cpuc->last_log_id) &&
rdlbr_from(tos)) { !lbr_is_reset_in_cstate(task_ctx)) {
task_ctx->lbr_stack_state = LBR_NONE; task_ctx->opt.lbr_stack_state = LBR_NONE;
return; return;
} }
x86_pmu.lbr_restore(task_ctx); x86_pmu.lbr_restore(task_ctx);
task_ctx->lbr_stack_state = LBR_NONE; task_ctx->opt.lbr_stack_state = LBR_NONE;
} }
void intel_pmu_lbr_save(void *ctx) void intel_pmu_lbr_save(void *ctx)
...@@ -415,17 +419,17 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) ...@@ -415,17 +419,17 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
{ {
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (task_ctx->lbr_callstack_users == 0) { if (task_ctx->opt.lbr_callstack_users == 0) {
task_ctx->lbr_stack_state = LBR_NONE; task_ctx->opt.lbr_stack_state = LBR_NONE;
return; return;
} }
x86_pmu.lbr_save(task_ctx); x86_pmu.lbr_save(task_ctx);
task_ctx->lbr_stack_state = LBR_VALID; task_ctx->opt.lbr_stack_state = LBR_VALID;
cpuc->last_task_ctx = task_ctx; cpuc->last_task_ctx = task_ctx;
cpuc->last_log_id = ++task_ctx->log_id; cpuc->last_log_id = ++task_ctx->opt.log_id;
} }
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev, void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
...@@ -447,8 +451,8 @@ void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev, ...@@ -447,8 +451,8 @@ void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
if (!prev_ctx_data || !next_ctx_data) if (!prev_ctx_data || !next_ctx_data)
return; return;
swap(prev_ctx_data->lbr_callstack_users, swap(prev_ctx_data->opt.lbr_callstack_users,
next_ctx_data->lbr_callstack_users); next_ctx_data->opt.lbr_callstack_users);
} }
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
...@@ -503,7 +507,7 @@ void intel_pmu_lbr_add(struct perf_event *event) ...@@ -503,7 +507,7 @@ void intel_pmu_lbr_add(struct perf_event *event)
if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) { if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
task_ctx = event->ctx->task_ctx_data; task_ctx = event->ctx->task_ctx_data;
task_ctx->lbr_callstack_users++; task_ctx->opt.lbr_callstack_users++;
} }
/* /*
...@@ -543,7 +547,7 @@ void intel_pmu_lbr_del(struct perf_event *event) ...@@ -543,7 +547,7 @@ void intel_pmu_lbr_del(struct perf_event *event)
if (branch_user_callstack(cpuc->br_sel) && if (branch_user_callstack(cpuc->br_sel) &&
event->ctx->task_ctx_data) { event->ctx->task_ctx_data) {
task_ctx = event->ctx->task_ctx_data; task_ctx = event->ctx->task_ctx_data;
task_ctx->lbr_callstack_users--; task_ctx->opt.lbr_callstack_users--;
} }
if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
......
...@@ -736,6 +736,12 @@ struct x86_pmu { ...@@ -736,6 +736,12 @@ struct x86_pmu {
int (*aux_output_match) (struct perf_event *event); int (*aux_output_match) (struct perf_event *event);
}; };
struct x86_perf_task_context_opt {
int lbr_callstack_users;
int lbr_stack_state;
int log_id;
};
struct x86_perf_task_context { struct x86_perf_task_context {
u64 lbr_from[MAX_LBR_ENTRIES]; u64 lbr_from[MAX_LBR_ENTRIES];
u64 lbr_to[MAX_LBR_ENTRIES]; u64 lbr_to[MAX_LBR_ENTRIES];
...@@ -743,9 +749,7 @@ struct x86_perf_task_context { ...@@ -743,9 +749,7 @@ struct x86_perf_task_context {
u64 lbr_sel; u64 lbr_sel;
int tos; int tos;
int valid_lbrs; int valid_lbrs;
int lbr_callstack_users; struct x86_perf_task_context_opt opt;
int lbr_stack_state;
int log_id;
}; };
#define x86_add_quirk(func_) \ #define x86_add_quirk(func_) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment