Commit 1921f3aa authored by Like Xu's avatar Like Xu Committed by Paolo Bonzini

KVM: x86: Use static calls to reduce kvm_pmu_ops overhead

Use static calls to improve kvm_pmu_ops performance, following the same
pattern and naming scheme used by kvm-x86-ops.h.

Here are the worst fenced_rdtsc() cycles numbers for the kvm_pmu_ops
functions that is most often called (up to 7 digits of calls) when running
a single perf test case in a guest on an ICX 2.70GHz host (mitigations=on):

		|	legacy	|	static call
------------------------------------------------------------
.pmc_idx_to_pmc	|	1304840	|	994872 (+23%)
.pmc_is_enabled	|	978670	|	1011750 (-3%)
.msr_idx_to_pmc	|	47828	|	41690 (+12%)
.is_valid_msr	|	28786	|	30108 (-4%)
Signed-off-by: default avatarLike Xu <likexu@tencent.com>
[sean: Handle static call updates in pmu.c, tweak changelog]
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Message-Id: <20220329235054.3534728-5-seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 34886e79
/* SPDX-License-Identifier: GPL-2.0 */
#if !defined(KVM_X86_PMU_OP) || !defined(KVM_X86_PMU_OP_OPTIONAL)
BUILD_BUG_ON(1)
#endif
/*
* KVM_X86_PMU_OP() and KVM_X86_PMU_OP_OPTIONAL() are used to help generate
* both DECLARE/DEFINE_STATIC_CALL() invocations and
* "static_call_update()" calls.
*
* KVM_X86_PMU_OP_OPTIONAL() can be used for those functions that can have
* a NULL definition, for example if "static_call_cond()" will be used
* at the call sites.
*/
KVM_X86_PMU_OP(pmc_perf_hw_id)
KVM_X86_PMU_OP(pmc_is_enabled)
KVM_X86_PMU_OP(pmc_idx_to_pmc)
KVM_X86_PMU_OP(rdpmc_ecx_to_pmc)
KVM_X86_PMU_OP(msr_idx_to_pmc)
KVM_X86_PMU_OP(is_valid_rdpmc_ecx)
KVM_X86_PMU_OP(is_valid_msr)
KVM_X86_PMU_OP(get_msr)
KVM_X86_PMU_OP(set_msr)
KVM_X86_PMU_OP(refresh)
KVM_X86_PMU_OP(init)
KVM_X86_PMU_OP(reset)
KVM_X86_PMU_OP_OPTIONAL(deliver_pmi)
KVM_X86_PMU_OP_OPTIONAL(cleanup)
#undef KVM_X86_PMU_OP
#undef KVM_X86_PMU_OP_OPTIONAL
...@@ -51,14 +51,28 @@ ...@@ -51,14 +51,28 @@
static struct kvm_pmu_ops kvm_pmu_ops __read_mostly; static struct kvm_pmu_ops kvm_pmu_ops __read_mostly;
#define KVM_X86_PMU_OP(func) \
DEFINE_STATIC_CALL_NULL(kvm_x86_pmu_##func, \
*(((struct kvm_pmu_ops *)0)->func));
#define KVM_X86_PMU_OP_OPTIONAL KVM_X86_PMU_OP
#include <asm/kvm-x86-pmu-ops.h>
void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops) void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
{ {
memcpy(&kvm_pmu_ops, pmu_ops, sizeof(kvm_pmu_ops)); memcpy(&kvm_pmu_ops, pmu_ops, sizeof(kvm_pmu_ops));
#define __KVM_X86_PMU_OP(func) \
static_call_update(kvm_x86_pmu_##func, kvm_pmu_ops.func);
#define KVM_X86_PMU_OP(func) \
WARN_ON(!kvm_pmu_ops.func); __KVM_X86_PMU_OP(func)
#define KVM_X86_PMU_OP_OPTIONAL __KVM_X86_PMU_OP
#include <asm/kvm-x86-pmu-ops.h>
#undef __KVM_X86_PMU_OP
} }
static inline bool pmc_is_enabled(struct kvm_pmc *pmc) static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
{ {
return kvm_pmu_ops.pmc_is_enabled(pmc); return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc);
} }
static void kvm_pmi_trigger_fn(struct irq_work *irq_work) static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
...@@ -225,7 +239,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) ...@@ -225,7 +239,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
ARCH_PERFMON_EVENTSEL_CMASK | ARCH_PERFMON_EVENTSEL_CMASK |
HSW_IN_TX | HSW_IN_TX |
HSW_IN_TX_CHECKPOINTED))) { HSW_IN_TX_CHECKPOINTED))) {
config = kvm_pmu_ops.pmc_perf_hw_id(pmc); config = static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc);
if (config != PERF_COUNT_HW_MAX) if (config != PERF_COUNT_HW_MAX)
type = PERF_TYPE_HARDWARE; type = PERF_TYPE_HARDWARE;
} }
...@@ -275,7 +289,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx) ...@@ -275,7 +289,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
pmc->current_config = (u64)ctrl; pmc->current_config = (u64)ctrl;
pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE, pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
kvm_pmu_ops.pmc_perf_hw_id(pmc), static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc),
!(en_field & 0x2), /* exclude user */ !(en_field & 0x2), /* exclude user */
!(en_field & 0x1), /* exclude kernel */ !(en_field & 0x1), /* exclude kernel */
pmi); pmi);
...@@ -284,7 +298,7 @@ EXPORT_SYMBOL_GPL(reprogram_fixed_counter); ...@@ -284,7 +298,7 @@ EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx) void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
{ {
struct kvm_pmc *pmc = kvm_pmu_ops.pmc_idx_to_pmc(pmu, pmc_idx); struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, pmc_idx);
if (!pmc) if (!pmc)
return; return;
...@@ -306,7 +320,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) ...@@ -306,7 +320,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
int bit; int bit;
for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) { for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
struct kvm_pmc *pmc = kvm_pmu_ops.pmc_idx_to_pmc(pmu, bit); struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit);
if (unlikely(!pmc || !pmc->perf_event)) { if (unlikely(!pmc || !pmc->perf_event)) {
clear_bit(bit, pmu->reprogram_pmi); clear_bit(bit, pmu->reprogram_pmi);
...@@ -328,7 +342,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) ...@@ -328,7 +342,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
/* check if idx is a valid index to access PMU */ /* check if idx is a valid index to access PMU */
bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{ {
return kvm_pmu_ops.is_valid_rdpmc_ecx(vcpu, idx); return static_call(kvm_x86_pmu_is_valid_rdpmc_ecx)(vcpu, idx);
} }
bool is_vmware_backdoor_pmc(u32 pmc_idx) bool is_vmware_backdoor_pmc(u32 pmc_idx)
...@@ -378,7 +392,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) ...@@ -378,7 +392,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
if (is_vmware_backdoor_pmc(idx)) if (is_vmware_backdoor_pmc(idx))
return kvm_pmu_rdpmc_vmware(vcpu, idx, data); return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
pmc = kvm_pmu_ops.rdpmc_ecx_to_pmc(vcpu, idx, &mask); pmc = static_call(kvm_x86_pmu_rdpmc_ecx_to_pmc)(vcpu, idx, &mask);
if (!pmc) if (!pmc)
return 1; return 1;
...@@ -394,22 +408,21 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) ...@@ -394,22 +408,21 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
{ {
if (lapic_in_kernel(vcpu)) { if (lapic_in_kernel(vcpu)) {
if (kvm_pmu_ops.deliver_pmi) static_call_cond(kvm_x86_pmu_deliver_pmi)(vcpu);
kvm_pmu_ops.deliver_pmi(vcpu);
kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
} }
} }
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
{ {
return kvm_pmu_ops.msr_idx_to_pmc(vcpu, msr) || return static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr) ||
kvm_pmu_ops.is_valid_msr(vcpu, msr); static_call(kvm_x86_pmu_is_valid_msr)(vcpu, msr);
} }
static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr) static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
{ {
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc = kvm_pmu_ops.msr_idx_to_pmc(vcpu, msr); struct kvm_pmc *pmc = static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr);
if (pmc) if (pmc)
__set_bit(pmc->idx, pmu->pmc_in_use); __set_bit(pmc->idx, pmu->pmc_in_use);
...@@ -417,13 +430,13 @@ static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr) ...@@ -417,13 +430,13 @@ static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{ {
return kvm_pmu_ops.get_msr(vcpu, msr_info); return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info);
} }
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{ {
kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index); kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
return kvm_pmu_ops.set_msr(vcpu, msr_info); return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info);
} }
/* refresh PMU settings. This function generally is called when underlying /* refresh PMU settings. This function generally is called when underlying
...@@ -432,7 +445,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -432,7 +445,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/ */
void kvm_pmu_refresh(struct kvm_vcpu *vcpu) void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
{ {
kvm_pmu_ops.refresh(vcpu); static_call(kvm_x86_pmu_refresh)(vcpu);
} }
void kvm_pmu_reset(struct kvm_vcpu *vcpu) void kvm_pmu_reset(struct kvm_vcpu *vcpu)
...@@ -440,7 +453,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu) ...@@ -440,7 +453,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
irq_work_sync(&pmu->irq_work); irq_work_sync(&pmu->irq_work);
kvm_pmu_ops.reset(vcpu); static_call(kvm_x86_pmu_reset)(vcpu);
} }
void kvm_pmu_init(struct kvm_vcpu *vcpu) void kvm_pmu_init(struct kvm_vcpu *vcpu)
...@@ -448,7 +461,7 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) ...@@ -448,7 +461,7 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
memset(pmu, 0, sizeof(*pmu)); memset(pmu, 0, sizeof(*pmu));
kvm_pmu_ops.init(vcpu); static_call(kvm_x86_pmu_init)(vcpu);
init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn); init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
pmu->event_count = 0; pmu->event_count = 0;
pmu->need_cleanup = false; pmu->need_cleanup = false;
...@@ -480,14 +493,13 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu) ...@@ -480,14 +493,13 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
pmu->pmc_in_use, X86_PMC_IDX_MAX); pmu->pmc_in_use, X86_PMC_IDX_MAX);
for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) { for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
pmc = kvm_pmu_ops.pmc_idx_to_pmc(pmu, i); pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc)) if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
pmc_stop_counter(pmc); pmc_stop_counter(pmc);
} }
if (kvm_pmu_ops.cleanup) static_call_cond(kvm_x86_pmu_cleanup)(vcpu);
kvm_pmu_ops.cleanup(vcpu);
bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX); bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
} }
...@@ -517,7 +529,7 @@ static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, ...@@ -517,7 +529,7 @@ static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
unsigned int config; unsigned int config;
pmc->eventsel &= (ARCH_PERFMON_EVENTSEL_EVENT | ARCH_PERFMON_EVENTSEL_UMASK); pmc->eventsel &= (ARCH_PERFMON_EVENTSEL_EVENT | ARCH_PERFMON_EVENTSEL_UMASK);
config = kvm_pmu_ops.pmc_perf_hw_id(pmc); config = static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc);
pmc->eventsel = old_eventsel; pmc->eventsel = old_eventsel;
return config == perf_hw_id; return config == perf_hw_id;
} }
...@@ -545,7 +557,7 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) ...@@ -545,7 +557,7 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
int i; int i;
for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) { for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
pmc = kvm_pmu_ops.pmc_idx_to_pmc(pmu, i); pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc)) if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc))
continue; continue;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment