Commit 74eedeba authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-urgent-2021-08-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Thomas Gleixner:
 "A set of perf fixes:

   - Correct the permission checks for perf event which send SIGTRAP to
     a different process and clean up that code to be more readable.

   - Prevent an out of bound MSR access in the x86 perf code which
     happened due to an incomplete limiting to the actually available
     hardware counters.

   - Prevent access to the AMD64_EVENTSEL_HOSTONLY bit when running
     inside a guest.

   - Handle small core counter re-enabling correctly by issuing an ACK
     right before reenabling it to prevent a stale PEBS record being
     kept around"

* tag 'perf-urgent-2021-08-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel: Apply mid ACK for small core
  perf/x86/amd: Don't touch the AMD64_EVENTSEL_HOSTONLY bit inside the guest
  perf/x86: Fix out of bound MSR access
  perf: Refactor permissions check into perf_check_permission()
  perf: Fix required permissions if sigtrap is requested
parents 66745863 acade637
...@@ -2489,13 +2489,15 @@ void perf_clear_dirty_counters(void) ...@@ -2489,13 +2489,15 @@ void perf_clear_dirty_counters(void)
return; return;
for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) { for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
/* Metrics and fake events don't have corresponding HW counters. */ if (i >= INTEL_PMC_IDX_FIXED) {
if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR)) /* Metrics and fake events don't have corresponding HW counters. */
continue; if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed))
else if (i >= INTEL_PMC_IDX_FIXED) continue;
wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0); wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
else } else {
wrmsrl(x86_pmu_event_addr(i), 0); wrmsrl(x86_pmu_event_addr(i), 0);
}
} }
bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX); bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX);
......
...@@ -2904,24 +2904,28 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) ...@@ -2904,24 +2904,28 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
*/ */
static int intel_pmu_handle_irq(struct pt_regs *regs) static int intel_pmu_handle_irq(struct pt_regs *regs)
{ {
struct cpu_hw_events *cpuc; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
bool late_ack = hybrid_bit(cpuc->pmu, late_ack);
bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack);
int loops; int loops;
u64 status; u64 status;
int handled; int handled;
int pmu_enabled; int pmu_enabled;
cpuc = this_cpu_ptr(&cpu_hw_events);
/* /*
* Save the PMU state. * Save the PMU state.
* It needs to be restored when leaving the handler. * It needs to be restored when leaving the handler.
*/ */
pmu_enabled = cpuc->enabled; pmu_enabled = cpuc->enabled;
/* /*
* No known reason to not always do late ACK, * In general, the early ACK is only applied for old platforms.
* but just in case do it opt-in. * For the big core starts from Haswell, the late ACK should be
* applied.
* For the small core after Tremont, we have to do the ACK right
* before re-enabling counters, which is in the middle of the
* NMI handler.
*/ */
if (!x86_pmu.late_ack) if (!late_ack && !mid_ack)
apic_write(APIC_LVTPC, APIC_DM_NMI); apic_write(APIC_LVTPC, APIC_DM_NMI);
intel_bts_disable_local(); intel_bts_disable_local();
cpuc->enabled = 0; cpuc->enabled = 0;
...@@ -2958,6 +2962,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -2958,6 +2962,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
goto again; goto again;
done: done:
if (mid_ack)
apic_write(APIC_LVTPC, APIC_DM_NMI);
/* Only restore PMU state when it's active. See x86_pmu_disable(). */ /* Only restore PMU state when it's active. See x86_pmu_disable(). */
cpuc->enabled = pmu_enabled; cpuc->enabled = pmu_enabled;
if (pmu_enabled) if (pmu_enabled)
...@@ -2969,7 +2975,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) ...@@ -2969,7 +2975,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
* have been reset. This avoids spurious NMIs on * have been reset. This avoids spurious NMIs on
* Haswell CPUs. * Haswell CPUs.
*/ */
if (x86_pmu.late_ack) if (late_ack)
apic_write(APIC_LVTPC, APIC_DM_NMI); apic_write(APIC_LVTPC, APIC_DM_NMI);
return handled; return handled;
} }
...@@ -6129,7 +6135,6 @@ __init int intel_pmu_init(void) ...@@ -6129,7 +6135,6 @@ __init int intel_pmu_init(void)
static_branch_enable(&perf_is_hybrid); static_branch_enable(&perf_is_hybrid);
x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS; x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS;
x86_pmu.late_ack = true;
x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_prec_dist = true;
x86_pmu.pebs_block = true; x86_pmu.pebs_block = true;
...@@ -6167,6 +6172,7 @@ __init int intel_pmu_init(void) ...@@ -6167,6 +6172,7 @@ __init int intel_pmu_init(void)
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
pmu->name = "cpu_core"; pmu->name = "cpu_core";
pmu->cpu_type = hybrid_big; pmu->cpu_type = hybrid_big;
pmu->late_ack = true;
if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
pmu->num_counters = x86_pmu.num_counters + 2; pmu->num_counters = x86_pmu.num_counters + 2;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
...@@ -6192,6 +6198,7 @@ __init int intel_pmu_init(void) ...@@ -6192,6 +6198,7 @@ __init int intel_pmu_init(void)
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
pmu->name = "cpu_atom"; pmu->name = "cpu_atom";
pmu->cpu_type = hybrid_small; pmu->cpu_type = hybrid_small;
pmu->mid_ack = true;
pmu->num_counters = x86_pmu.num_counters; pmu->num_counters = x86_pmu.num_counters;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed; pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
pmu->max_pebs_events = x86_pmu.max_pebs_events; pmu->max_pebs_events = x86_pmu.max_pebs_events;
......
...@@ -656,6 +656,10 @@ struct x86_hybrid_pmu { ...@@ -656,6 +656,10 @@ struct x86_hybrid_pmu {
struct event_constraint *event_constraints; struct event_constraint *event_constraints;
struct event_constraint *pebs_constraints; struct event_constraint *pebs_constraints;
struct extra_reg *extra_regs; struct extra_reg *extra_regs;
unsigned int late_ack :1,
mid_ack :1,
enabled_ack :1;
}; };
static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu) static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
...@@ -686,6 +690,16 @@ extern struct static_key_false perf_is_hybrid; ...@@ -686,6 +690,16 @@ extern struct static_key_false perf_is_hybrid;
__Fp; \ __Fp; \
})) }))
#define hybrid_bit(_pmu, _field) \
({ \
bool __Fp = x86_pmu._field; \
\
if (is_hybrid() && (_pmu)) \
__Fp = hybrid_pmu(_pmu)->_field; \
\
__Fp; \
})
enum hybrid_pmu_type { enum hybrid_pmu_type {
hybrid_big = 0x40, hybrid_big = 0x40,
hybrid_small = 0x20, hybrid_small = 0x20,
...@@ -755,6 +769,7 @@ struct x86_pmu { ...@@ -755,6 +769,7 @@ struct x86_pmu {
/* PMI handler bits */ /* PMI handler bits */
unsigned int late_ack :1, unsigned int late_ack :1,
mid_ack :1,
enabled_ack :1; enabled_ack :1;
/* /*
* sysfs attrs * sysfs attrs
...@@ -1115,9 +1130,10 @@ void x86_pmu_stop(struct perf_event *event, int flags); ...@@ -1115,9 +1130,10 @@ void x86_pmu_stop(struct perf_event *event, int flags);
static inline void x86_pmu_disable_event(struct perf_event *event) static inline void x86_pmu_disable_event(struct perf_event *event)
{ {
u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
struct hw_perf_event *hwc = &event->hw; struct hw_perf_event *hwc = &event->hw;
wrmsrl(hwc->config_base, hwc->config); wrmsrl(hwc->config_base, hwc->config & ~disable_mask);
if (is_counter_pair(hwc)) if (is_counter_pair(hwc))
wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0); wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0);
......
...@@ -11917,6 +11917,37 @@ __perf_event_ctx_lock_double(struct perf_event *group_leader, ...@@ -11917,6 +11917,37 @@ __perf_event_ctx_lock_double(struct perf_event *group_leader,
return gctx; return gctx;
} }
static bool
perf_check_permission(struct perf_event_attr *attr, struct task_struct *task)
{
unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS;
bool is_capable = perfmon_capable();
if (attr->sigtrap) {
/*
* perf_event_attr::sigtrap sends signals to the other task.
* Require the current task to also have CAP_KILL.
*/
rcu_read_lock();
is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL);
rcu_read_unlock();
/*
* If the required capabilities aren't available, checks for
* ptrace permissions: upgrade to ATTACH, since sending signals
* can effectively change the target task.
*/
ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS;
}
/*
* Preserve ptrace permission check for backwards compatibility. The
* ptrace check also includes checks that the current task and other
* task have matching uids, and is therefore not done here explicitly.
*/
return is_capable || ptrace_may_access(task, ptrace_mode);
}
/** /**
* sys_perf_event_open - open a performance event, associate it to a task/cpu * sys_perf_event_open - open a performance event, associate it to a task/cpu
* *
...@@ -12163,15 +12194,13 @@ SYSCALL_DEFINE5(perf_event_open, ...@@ -12163,15 +12194,13 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_file; goto err_file;
/* /*
* Preserve ptrace permission check for backwards compatibility.
*
* We must hold exec_update_lock across this and any potential * We must hold exec_update_lock across this and any potential
* perf_install_in_context() call for this new event to * perf_install_in_context() call for this new event to
* serialize against exec() altering our credentials (and the * serialize against exec() altering our credentials (and the
* perf_event_exit_task() that could imply). * perf_event_exit_task() that could imply).
*/ */
err = -EACCES; err = -EACCES;
if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) if (!perf_check_permission(&attr, task))
goto err_cred; goto err_cred;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment