Commit 79f3e3b5 authored by Like Xu's avatar Like Xu Committed by Paolo Bonzini

KVM: x86/pmu: Reprogram PEBS event to emulate guest PEBS counter

When a guest counter is configured as a PEBS counter through
IA32_PEBS_ENABLE, a guest PEBS event will be reprogrammed by
configuring a non-zero precision level in the perf_event_attr.

The guest PEBS overflow PMI bit would be set in the guest
GLOBAL_STATUS MSR when PEBS facility generates a PEBS
overflow PMI based on guest IA32_DS_AREA MSR.

Even with the same counter index and the same event code and
mask, guest PEBS events will not be reused for non-PEBS events.
Originally-by: default avatarAndi Kleen <ak@linux.intel.com>
Co-developed-by: default avatarKan Liang <kan.liang@linux.intel.com>
Signed-off-by: default avatarKan Liang <kan.liang@linux.intel.com>
Signed-off-by: default avatarLike Xu <likexu@tencent.com>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Message-Id: <20220411101946.20262-9-likexu@tencent.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent c59a1f10
...@@ -86,15 +86,22 @@ static void kvm_pmi_trigger_fn(struct irq_work *irq_work) ...@@ -86,15 +86,22 @@ static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
{ {
struct kvm_pmu *pmu = pmc_to_pmu(pmc); struct kvm_pmu *pmu = pmc_to_pmu(pmc);
bool skip_pmi = false;
/* Ignore counters that have been reprogrammed already. */ /* Ignore counters that have been reprogrammed already. */
if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
return; return;
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status); if (pmc->perf_event && pmc->perf_event->attr.precise_ip) {
/* Indicate PEBS overflow PMI to guest. */
skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT,
(unsigned long *)&pmu->global_status);
} else {
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
}
kvm_make_request(KVM_REQ_PMU, pmc->vcpu); kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
if (!pmc->intr) if (!pmc->intr || skip_pmi)
return; return;
/* /*
...@@ -124,6 +131,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, ...@@ -124,6 +131,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
u64 config, bool exclude_user, u64 config, bool exclude_user,
bool exclude_kernel, bool intr) bool exclude_kernel, bool intr)
{ {
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
struct perf_event *event; struct perf_event *event;
struct perf_event_attr attr = { struct perf_event_attr attr = {
.type = type, .type = type,
...@@ -135,6 +143,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, ...@@ -135,6 +143,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
.exclude_kernel = exclude_kernel, .exclude_kernel = exclude_kernel,
.config = config, .config = config,
}; };
bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable);
if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX) if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX)
return; return;
...@@ -150,6 +159,23 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, ...@@ -150,6 +159,23 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
*/ */
attr.sample_period = 0; attr.sample_period = 0;
} }
if (pebs) {
/*
* The non-zero precision level of guest event makes the ordinary
* guest event becomes a guest PEBS event and triggers the host
* PEBS PMI handler to determine whether the PEBS overflow PMI
* comes from the host counters or the guest.
*
* For most PEBS hardware events, the difference in the software
* precision levels of guest and host PEBS events will not affect
* the accuracy of the PEBS profiling result, because the "event IP"
* in the PEBS record is calibrated on the guest side.
*
* On Icelake everything is fine. Other hardware (GLC+, TNT+) that
* could possibly care here is unsupported and needs changes.
*/
attr.precise_ip = 1;
}
event = perf_event_create_kernel_counter(&attr, -1, current, event = perf_event_create_kernel_counter(&attr, -1, current,
kvm_perf_overflow, pmc); kvm_perf_overflow, pmc);
...@@ -163,7 +189,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, ...@@ -163,7 +189,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
pmc_to_pmu(pmc)->event_count++; pmc_to_pmu(pmc)->event_count++;
clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
pmc->is_paused = false; pmc->is_paused = false;
pmc->intr = intr; pmc->intr = intr || pebs;
} }
static void pmc_pause_counter(struct kvm_pmc *pmc) static void pmc_pause_counter(struct kvm_pmc *pmc)
...@@ -189,6 +215,10 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc) ...@@ -189,6 +215,10 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
get_sample_period(pmc, pmc->counter))) get_sample_period(pmc, pmc->counter)))
return false; return false;
if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) &&
pmc->perf_event->attr.precise_ip)
return false;
/* reuse perf_event to serve as pmc_reprogram_counter() does*/ /* reuse perf_event to serve as pmc_reprogram_counter() does*/
perf_event_enable(pmc->perf_event); perf_event_enable(pmc->perf_event);
pmc->is_paused = false; pmc->is_paused = false;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment