Commit 407ab579 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "ARM:
   - fix compilation error when PMD and PUD are folded
   - fix regression in reads-as-zero behaviour of ID_AA64ZFR0_EL1
   - add aarch64 get-reg-list test

  x86:
   - fix semantic conflict between two series merged for 5.10
   - fix (and test) enforcement of paravirtual cpuid features

  selftests:
   - various cleanups to memory management selftests
   - new selftests testcase for performance of dirty logging"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (30 commits)
  KVM: selftests: allow two iterations of dirty_log_perf_test
  KVM: selftests: Introduce the dirty log perf test
  KVM: selftests: Make the number of vcpus global
  KVM: selftests: Make the per vcpu memory size global
  KVM: selftests: Drop pointless vm_create wrapper
  KVM: selftests: Add wrfract to common guest code
  KVM: selftests: Simplify demand_paging_test with timespec_diff_now
  KVM: selftests: Remove address rounding in guest code
  KVM: selftests: Factor code out of demand_paging_test
  KVM: selftests: Use a single binary for dirty/clear log test
  KVM: selftests: Always clear dirty bitmap after iteration
  KVM: selftests: Add blessed SVE registers to get-reg-list
  KVM: selftests: Add aarch64 get-reg-list test
  selftests: kvm: test enforcement of paravirtual cpuid features
  selftests: kvm: Add exception handling to selftests
  selftests: kvm: Clear uc so UCALL_NONE is being properly reported
  selftests: kvm: Fix the segment descriptor layout to match the actual layout
  KVM: x86: handle MSR_IA32_DEBUGCTLMSR with report_ignored_msrs
  kvm: x86: request masterclock update any time guest uses different msr
  kvm: x86: ensure pv_cpuid.features is initialized when enabling cap
  ...
parents 3552c370 6d6a18fd
......@@ -6367,7 +6367,7 @@ accesses that would usually trigger a #GP by KVM into the guest will
instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
KVM_EXIT_X86_WRMSR exit notifications.
8.25 KVM_X86_SET_MSR_FILTER
8.27 KVM_X86_SET_MSR_FILTER
---------------------------
:Architectures: x86
......@@ -6381,8 +6381,7 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
trap and emulate MSRs that are outside of the scope of KVM as well as
limit the attack surface on KVM's MSR emulation code.
8.26 KVM_CAP_ENFORCE_PV_CPUID
8.28 KVM_CAP_ENFORCE_PV_CPUID
-----------------------------
Architectures: x86
......
......@@ -788,10 +788,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
switch (vma_shift) {
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SHIFT:
if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE))
break;
fallthrough;
#endif
case CONT_PMD_SHIFT:
vma_shift = PMD_SHIFT;
fallthrough;
......
......@@ -1069,7 +1069,7 @@ static bool trap_ptrauth(struct kvm_vcpu *vcpu,
static unsigned int ptrauth_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN_USER | REG_HIDDEN_GUEST;
return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN;
}
#define __PTRAUTH_KEY(k) \
......@@ -1153,6 +1153,22 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
return val;
}
static unsigned int id_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
(u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
switch (id) {
case SYS_ID_AA64ZFR0_EL1:
if (!vcpu_has_sve(vcpu))
return REG_RAZ;
break;
}
return 0;
}
/* cpufeature ID register access trap handlers */
static bool __access_id_reg(struct kvm_vcpu *vcpu,
......@@ -1171,7 +1187,9 @@ static bool access_id_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
return __access_id_reg(vcpu, p, r, false);
bool raz = sysreg_visible_as_raz(vcpu, r);
return __access_id_reg(vcpu, p, r, raz);
}
static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
......@@ -1192,72 +1210,7 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
if (vcpu_has_sve(vcpu))
return 0;
return REG_HIDDEN_USER | REG_HIDDEN_GUEST;
}
/* Visibility overrides for SVE-specific ID registers */
static unsigned int sve_id_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
if (vcpu_has_sve(vcpu))
return 0;
return REG_HIDDEN_USER;
}
/* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */
static u64 guest_id_aa64zfr0_el1(const struct kvm_vcpu *vcpu)
{
if (!vcpu_has_sve(vcpu))
return 0;
return read_sanitised_ftr_reg(SYS_ID_AA64ZFR0_EL1);
}
static bool access_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
if (p->is_write)
return write_to_read_only(vcpu, p, rd);
p->regval = guest_id_aa64zfr0_el1(vcpu);
return true;
}
static int get_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
u64 val;
if (WARN_ON(!vcpu_has_sve(vcpu)))
return -ENOENT;
val = guest_id_aa64zfr0_el1(vcpu);
return reg_to_user(uaddr, &val, reg->id);
}
static int set_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
const u64 id = sys_reg_to_index(rd);
int err;
u64 val;
if (WARN_ON(!vcpu_has_sve(vcpu)))
return -ENOENT;
err = reg_from_user(&val, uaddr, id);
if (err)
return err;
/* This is what we mean by invariant: you can't change it. */
if (val != guest_id_aa64zfr0_el1(vcpu))
return -EINVAL;
return 0;
return REG_HIDDEN;
}
/*
......@@ -1299,13 +1252,17 @@ static int __set_id_reg(const struct kvm_vcpu *vcpu,
static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
return __get_id_reg(vcpu, rd, uaddr, false);
bool raz = sysreg_visible_as_raz(vcpu, rd);
return __get_id_reg(vcpu, rd, uaddr, raz);
}
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
return __set_id_reg(vcpu, rd, uaddr, false);
bool raz = sysreg_visible_as_raz(vcpu, rd);
return __set_id_reg(vcpu, rd, uaddr, raz);
}
static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
......@@ -1397,6 +1354,7 @@ static bool access_mte_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
.access = access_id_reg, \
.get_user = get_id_reg, \
.set_user = set_id_reg, \
.visibility = id_visibility, \
}
/*
......@@ -1518,7 +1476,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_SANITISED(ID_AA64PFR1_EL1),
ID_UNALLOCATED(4,2),
ID_UNALLOCATED(4,3),
{ SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, .visibility = sve_id_visibility },
ID_SANITISED(ID_AA64ZFR0_EL1),
ID_UNALLOCATED(4,5),
ID_UNALLOCATED(4,6),
ID_UNALLOCATED(4,7),
......@@ -2185,7 +2143,7 @@ static void perform_access(struct kvm_vcpu *vcpu,
trace_kvm_sys_access(*vcpu_pc(vcpu), params, r);
/* Check for regs disabled by runtime config */
if (sysreg_hidden_from_guest(vcpu, r)) {
if (sysreg_hidden(vcpu, r)) {
kvm_inject_undefined(vcpu);
return;
}
......@@ -2684,7 +2642,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
return get_invariant_sys_reg(reg->id, uaddr);
/* Check for regs disabled by runtime config */
if (sysreg_hidden_from_user(vcpu, r))
if (sysreg_hidden(vcpu, r))
return -ENOENT;
if (r->get_user)
......@@ -2709,7 +2667,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
return set_invariant_sys_reg(reg->id, uaddr);
/* Check for regs disabled by runtime config */
if (sysreg_hidden_from_user(vcpu, r))
if (sysreg_hidden(vcpu, r))
return -ENOENT;
if (r->set_user)
......@@ -2780,7 +2738,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
if (!(rd->reg || rd->get_user))
return 0;
if (sysreg_hidden_from_user(vcpu, rd))
if (sysreg_hidden(vcpu, rd))
return 0;
if (!copy_reg_to_user(rd, uind))
......
......@@ -59,8 +59,8 @@ struct sys_reg_desc {
const struct sys_reg_desc *rd);
};
#define REG_HIDDEN_USER (1 << 0) /* hidden from userspace ioctls */
#define REG_HIDDEN_GUEST (1 << 1) /* hidden from guest */
#define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */
#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */
static __printf(2, 3)
inline void print_sys_reg_msg(const struct sys_reg_params *p,
......@@ -111,22 +111,22 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r
__vcpu_sys_reg(vcpu, r->reg) = r->val;
}
static inline bool sysreg_hidden_from_guest(const struct kvm_vcpu *vcpu,
static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (likely(!r->visibility))
return false;
return r->visibility(vcpu, r) & REG_HIDDEN_GUEST;
return r->visibility(vcpu, r) & REG_HIDDEN;
}
static inline bool sysreg_hidden_from_user(const struct kvm_vcpu *vcpu,
static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (likely(!r->visibility))
return false;
return r->visibility(vcpu, r) & REG_HIDDEN_USER;
return r->visibility(vcpu, r) & REG_RAZ;
}
static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
......
......@@ -90,6 +90,20 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
return 0;
}
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
/*
* save the feature bitmap to avoid cpuid lookup for every PV
* operation
*/
if (best)
vcpu->arch.pv_cpuid.features = best->eax;
}
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
......@@ -124,13 +138,6 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
/*
* save the feature bitmap to avoid cpuid lookup for every PV
* operation
*/
if (best)
vcpu->arch.pv_cpuid.features = best->eax;
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
if (best)
......@@ -162,6 +169,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.guest_supported_xcr0 =
(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
kvm_update_pv_runtime(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
kvm_mmu_reset_context(vcpu);
......
......@@ -11,6 +11,7 @@ extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
void kvm_set_cpu_caps(void);
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
......
......@@ -856,13 +856,15 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
} else {
rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
while (desc->sptes[PTE_LIST_EXT-1] && desc->more) {
desc = desc->more;
while (desc->sptes[PTE_LIST_EXT-1]) {
count += PTE_LIST_EXT;
}
if (desc->sptes[PTE_LIST_EXT-1]) {
if (!desc->more) {
desc->more = mmu_alloc_pte_list_desc(vcpu);
desc = desc->more;
break;
}
desc = desc->more;
}
for (i = 0; desc->sptes[i]; ++i)
++count;
......
......@@ -255,10 +255,9 @@ static struct kmem_cache *x86_emulator_cache;
/*
* When called, it means the previous get/set msr reached an invalid msr.
* Return 0 if we want to ignore/silent this failed msr access, or 1 if we want
* to fail the caller.
* Return true if we want to ignore/silent this failed msr access.
*/
static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
u64 data, bool write)
{
const char *op = write ? "wrmsr" : "rdmsr";
......@@ -268,11 +267,11 @@ static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n",
op, msr, data);
/* Mask the error */
return 0;
return true;
} else {
kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
op, msr, data);
return -ENOENT;
return false;
}
}
......@@ -1416,7 +1415,8 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
if (r == KVM_MSR_RET_INVALID) {
/* Unconditionally clear the output for simplicity */
*data = 0;
r = kvm_msr_ignored_check(vcpu, index, 0, false);
if (kvm_msr_ignored_check(vcpu, index, 0, false))
r = 0;
}
if (r)
......@@ -1540,7 +1540,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
struct msr_data msr;
if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
return -EPERM;
return KVM_MSR_RET_FILTERED;
switch (index) {
case MSR_FS_BASE:
......@@ -1581,7 +1581,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
if (ret == KVM_MSR_RET_INVALID)
ret = kvm_msr_ignored_check(vcpu, index, data, true);
if (kvm_msr_ignored_check(vcpu, index, data, true))
ret = 0;
return ret;
}
......@@ -1599,7 +1600,7 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
int ret;
if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
return -EPERM;
return KVM_MSR_RET_FILTERED;
msr.index = index;
msr.host_initiated = host_initiated;
......@@ -1618,7 +1619,8 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
if (ret == KVM_MSR_RET_INVALID) {
/* Unconditionally clear *data for simplicity */
*data = 0;
ret = kvm_msr_ignored_check(vcpu, index, 0, false);
if (kvm_msr_ignored_check(vcpu, index, 0, false))
ret = 0;
}
return ret;
......@@ -1662,9 +1664,9 @@ static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
static u64 kvm_msr_reason(int r)
{
switch (r) {
case -ENOENT:
case KVM_MSR_RET_INVALID:
return KVM_MSR_EXIT_REASON_UNKNOWN;
case -EPERM:
case KVM_MSR_RET_FILTERED:
return KVM_MSR_EXIT_REASON_FILTER;
default:
return KVM_MSR_EXIT_REASON_INVAL;
......@@ -1965,7 +1967,7 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
struct kvm_arch *ka = &vcpu->kvm->arch;
if (vcpu->vcpu_id == 0 && !host_initiated) {
if (ka->boot_vcpu_runs_old_kvmclock && old_msr)
if (ka->boot_vcpu_runs_old_kvmclock != old_msr)
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
ka->boot_vcpu_runs_old_kvmclock = old_msr;
......@@ -3063,7 +3065,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* Values other than LBR and BTF are vendor-specific,
thus reserved and should throw a #GP */
return 1;
}
} else if (report_ignored_msrs)
vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
__func__, data);
break;
......@@ -3463,29 +3465,63 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.efer;
break;
case MSR_KVM_WALL_CLOCK:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
return 1;
msr_info->data = vcpu->kvm->arch.wall_clock;
break;
case MSR_KVM_WALL_CLOCK_NEW:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
return 1;
msr_info->data = vcpu->kvm->arch.wall_clock;
break;
case MSR_KVM_SYSTEM_TIME:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
return 1;
msr_info->data = vcpu->arch.time;
break;
case MSR_KVM_SYSTEM_TIME_NEW:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
return 1;
msr_info->data = vcpu->arch.time;
break;
case MSR_KVM_ASYNC_PF_EN:
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
return 1;
msr_info->data = vcpu->arch.apf.msr_en_val;
break;
case MSR_KVM_ASYNC_PF_INT:
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
return 1;
msr_info->data = vcpu->arch.apf.msr_int_val;
break;
case MSR_KVM_ASYNC_PF_ACK:
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
return 1;
msr_info->data = 0;
break;
case MSR_KVM_STEAL_TIME:
if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
return 1;
msr_info->data = vcpu->arch.st.msr_val;
break;
case MSR_KVM_PV_EOI_EN:
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
return 1;
msr_info->data = vcpu->arch.pv_eoi.msr_val;
break;
case MSR_KVM_POLL_CONTROL:
if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
return 1;
msr_info->data = vcpu->arch.msr_kvm_poll_control;
break;
case MSR_IA32_P5_MC_ADDR:
......@@ -4575,6 +4611,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
vcpu->arch.pv_cpuid.enforce = cap->args[0];
if (vcpu->arch.pv_cpuid.enforce)
kvm_update_pv_runtime(vcpu);
return 0;
......
......@@ -376,7 +376,13 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
#define KVM_MSR_RET_INVALID 2
/*
* Internal error codes that are used to indicate that MSR emulation encountered
* an error that should result in #GP in the guest, unless userspace
* handles it.
*/
#define KVM_MSR_RET_INVALID 2 /* in-kernel MSR emulation #GP condition */
#define KVM_MSR_RET_FILTERED 3 /* #GP due to userspace MSR filter */
#define __cr4_reserved_bits(__cpu_has, __c) \
({ \
......
# SPDX-License-Identifier: GPL-2.0-only
/aarch64/get-reg-list
/aarch64/get-reg-list-sve
/s390x/memop
/s390x/resets
/s390x/sync_regs_test
/x86_64/cr4_cpuid_sync_test
/x86_64/debug_regs
/x86_64/evmcs_test
/x86_64/kvm_pv_test
/x86_64/hyperv_cpuid
/x86_64/mmio_warning_test
/x86_64/platform_info_test
......@@ -24,6 +27,7 @@
/clear_dirty_log_test
/demand_paging_test
/dirty_log_test
/dirty_log_perf_test
/kvm_create_max_vcpus
/set_memory_region_test
/steal_time
......@@ -34,13 +34,14 @@ ifeq ($(ARCH),s390)
endif
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c
LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
......@@ -58,14 +59,15 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += set_memory_region_test
TEST_GEN_PROGS_x86_64 += steal_time
TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
......@@ -111,14 +113,21 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
include ../lib.mk
STATIC_LIBS := $(OUTPUT)/libkvm.a
LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.*
LIBKVM_C := $(filter %.c,$(LIBKVM))
LIBKVM_S := $(filter %.S,$(LIBKVM))
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.*
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
$(AR) crs $@ $^
x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
......
// SPDX-License-Identifier: GPL-2.0
#define REG_LIST_SVE
#include "get-reg-list.c"
This diff is collapsed.
#define USE_CLEAR_DIRTY_LOG
#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
#include "dirty_log_test.c"
This diff is collapsed.
......@@ -128,6 +128,78 @@ static uint64_t host_dirty_count;
static uint64_t host_clear_count;
static uint64_t host_track_next_count;
enum log_mode_t {
/* Only use KVM_GET_DIRTY_LOG for logging */
LOG_MODE_DIRTY_LOG = 0,
/* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
LOG_MODE_CLEAR_LOG = 1,
LOG_MODE_NUM,
/* Run all supported modes */
LOG_MODE_ALL = LOG_MODE_NUM,
};
/* Mode of logging to test. Default is to run all supported modes */
static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
/* Logging mode for current run */
static enum log_mode_t host_log_mode;
static bool clear_log_supported(void)
{
return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
}
static void clear_log_create_vm_done(struct kvm_vm *vm)
{
struct kvm_enable_cap cap = {};
u64 manual_caps;
manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!");
manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
KVM_DIRTY_LOG_INITIALLY_SET);
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
cap.args[0] = manual_caps;
vm_enable_cap(vm, &cap);
}
static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
void *bitmap, uint32_t num_pages)
{
kvm_vm_get_dirty_log(vm, slot, bitmap);
}
static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
void *bitmap, uint32_t num_pages)
{
kvm_vm_get_dirty_log(vm, slot, bitmap);
kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
}
struct log_mode {
const char *name;
/* Return true if this mode is supported, otherwise false */
bool (*supported)(void);
/* Hook when the vm creation is done (before vcpu creation) */
void (*create_vm_done)(struct kvm_vm *vm);
/* Hook to collect the dirty pages into the bitmap provided */
void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
void *bitmap, uint32_t num_pages);
} log_modes[LOG_MODE_NUM] = {
{
.name = "dirty-log",
.collect_dirty_pages = dirty_log_collect_dirty_pages,
},
{
.name = "clear-log",
.supported = clear_log_supported,
.create_vm_done = clear_log_create_vm_done,
.collect_dirty_pages = clear_log_collect_dirty_pages,
},
};
/*
* We use this bitmap to track some pages that should have its dirty
* bit set in the _next_ iteration. For example, if we detected the
......@@ -137,6 +209,44 @@ static uint64_t host_track_next_count;
*/
static unsigned long *host_bmap_track;
static void log_modes_dump(void)
{
int i;
printf("all");
for (i = 0; i < LOG_MODE_NUM; i++)
printf(", %s", log_modes[i].name);
printf("\n");
}
static bool log_mode_supported(void)
{
struct log_mode *mode = &log_modes[host_log_mode];
if (mode->supported)
return mode->supported();
return true;
}
static void log_mode_create_vm_done(struct kvm_vm *vm)
{
struct log_mode *mode = &log_modes[host_log_mode];
if (mode->create_vm_done)
mode->create_vm_done(vm);
}
static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
void *bitmap, uint32_t num_pages)
{
struct log_mode *mode = &log_modes[host_log_mode];
TEST_ASSERT(mode->collect_dirty_pages != NULL,
"collect_dirty_pages() is required for any log mode!");
mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
}
static void generate_random_array(uint64_t *guest_array, uint64_t size)
{
uint64_t i;
......@@ -195,7 +305,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
page);
}
if (test_bit_le(page, bmap)) {
if (test_and_clear_bit_le(page, bmap)) {
host_dirty_count++;
/*
* If the bit is set, the value written onto
......@@ -252,11 +362,12 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
#ifdef __x86_64__
vm_create_irqchip(vm);
#endif
log_mode_create_vm_done(vm);
vm_vcpu_add_default(vm, vcpuid, guest_code);
return vm;
}
......@@ -264,10 +375,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
#define DIRTY_MEM_BITS 30 /* 1G */
#define PAGE_SHIFT_4K 12
#ifdef USE_CLEAR_DIRTY_LOG
static u64 dirty_log_manual_caps;
#endif
static void run_test(enum vm_guest_mode mode, unsigned long iterations,
unsigned long interval, uint64_t phys_offset)
{
......@@ -275,6 +382,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
struct kvm_vm *vm;
unsigned long *bmap;
if (!log_mode_supported()) {
print_skip("Log mode '%s' not supported",
log_modes[host_log_mode].name);
return;
}
/*
* We reserve page table for 2 times of extra dirty mem which
* will definitely cover the original (1G+) test range. Here
......@@ -317,14 +430,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
bmap = bitmap_alloc(host_num_pages);
host_bmap_track = bitmap_alloc(host_num_pages);
#ifdef USE_CLEAR_DIRTY_LOG
struct kvm_enable_cap cap = {};
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
cap.args[0] = dirty_log_manual_caps;
vm_enable_cap(vm, &cap);
#endif
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
guest_test_phys_mem,
......@@ -362,11 +467,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
while (iteration < iterations) {
/* Give the vcpu thread some time to dirty some pages */
usleep(interval * 1000);
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
#ifdef USE_CLEAR_DIRTY_LOG
kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
host_num_pages);
#endif
log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX,
bmap, host_num_pages);
vm_dirty_log_verify(mode, bmap);
iteration++;
sync_global_to_guest(vm, iteration);
......@@ -410,6 +512,9 @@ static void help(char *name)
TEST_HOST_LOOP_INTERVAL);
printf(" -p: specify guest physical test memory offset\n"
" Warning: a low offset can conflict with the loaded test code.\n");
printf(" -M: specify the host logging mode "
"(default: run all log modes). Supported modes: \n\t");
log_modes_dump();
printf(" -m: specify the guest mode ID to test "
"(default: test all supported modes)\n"
" This option may be used multiple times.\n"
......@@ -429,18 +534,7 @@ int main(int argc, char *argv[])
bool mode_selected = false;
uint64_t phys_offset = 0;
unsigned int mode;
int opt, i;
#ifdef USE_CLEAR_DIRTY_LOG
dirty_log_manual_caps =
kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
if (!dirty_log_manual_caps) {
print_skip("KVM_CLEAR_DIRTY_LOG not available");
exit(KSFT_SKIP);
}
dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
KVM_DIRTY_LOG_INITIALLY_SET);
#endif
int opt, i, j;
#ifdef __x86_64__
guest_mode_init(VM_MODE_PXXV48_4K, true, true);
......@@ -464,7 +558,7 @@ int main(int argc, char *argv[])
guest_mode_init(VM_MODE_P40V48_4K, true, true);
#endif
while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) {
while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) {
switch (opt) {
case 'i':
iterations = strtol(optarg, NULL, 10);
......@@ -486,6 +580,26 @@ int main(int argc, char *argv[])
"Guest mode ID %d too big", mode);
guest_modes[mode].enabled = true;
break;
case 'M':
if (!strcmp(optarg, "all")) {
host_log_mode_option = LOG_MODE_ALL;
break;
}
for (i = 0; i < LOG_MODE_NUM; i++) {
if (!strcmp(optarg, log_modes[i].name)) {
pr_info("Setting log mode to: '%s'\n",
optarg);
host_log_mode_option = i;
break;
}
}
if (i == LOG_MODE_NUM) {
printf("Log mode '%s' invalid. Please choose "
"from: ", optarg);
log_modes_dump();
exit(1);
}
break;
case 'h':
default:
help(argv[0]);
......@@ -507,8 +621,19 @@ int main(int argc, char *argv[])
TEST_ASSERT(guest_modes[i].supported,
"Guest mode ID %d (%s) not supported.",
i, vm_guest_mode_string(i));
if (host_log_mode_option == LOG_MODE_ALL) {
/* Run each log mode */
for (j = 0; j < LOG_MODE_NUM; j++) {
pr_info("Testing Log Mode '%s'\n",
log_modes[j].name);
host_log_mode = j;
run_test(i, iterations, interval, phys_offset);
}
} else {
host_log_mode = host_log_mode_option;
run_test(i, iterations, interval, phys_offset);
}
}
return 0;
}
......@@ -63,9 +63,11 @@ enum vm_mem_backing_src_type {
int kvm_check_cap(long cap);
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_enable_cap *cap);
void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
void kvm_vm_free(struct kvm_vm *vmp);
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
void kvm_vm_release(struct kvm_vm *vmp);
......@@ -149,6 +151,7 @@ void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_guest_debug *debug);
void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_mp_state *mp_state);
struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
......@@ -294,6 +297,8 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
memcpy(&(g), _p, sizeof(g)); \
})
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
/* Common ucalls */
enum {
UCALL_NONE,
......
// SPDX-License-Identifier: GPL-2.0
/*
* tools/testing/selftests/kvm/include/perf_test_util.h
*
* Copyright (C) 2020, Google LLC.
*/
#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H
#define SELFTEST_KVM_PERF_TEST_UTIL_H
#include "kvm_util.h"
#include "processor.h"
#define MAX_VCPUS 512
#define PAGE_SHIFT_4K 12
#define PTES_PER_4K_PT 512
#define TEST_MEM_SLOT_INDEX 1
/* Default guest test virtual memory offset */
#define DEFAULT_GUEST_TEST_MEM 0xc0000000
#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */
/*
* Guest physical memory offset of the testing memory slot.
* This will be set to the topmost valid physical address minus
* the test memory size.
*/
static uint64_t guest_test_phys_mem;
/*
* Guest virtual memory offset of the testing memory slot.
* Must not conflict with identity mapped test code.
*/
static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
/* Number of VCPUs for the test */
static int nr_vcpus = 1;
struct vcpu_args {
uint64_t gva;
uint64_t pages;
/* Only used by the host userspace part of the vCPU thread */
int vcpu_id;
};
struct perf_test_args {
struct kvm_vm *vm;
uint64_t host_page_size;
uint64_t guest_page_size;
int wr_fract;
struct vcpu_args vcpu_args[MAX_VCPUS];
};
static struct perf_test_args perf_test_args;
/*
* Continuously write to the first 8 bytes of each page in the
* specified region.
*/
static void guest_code(uint32_t vcpu_id)
{
struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
uint64_t gva;
uint64_t pages;
int i;
/* Make sure vCPU args data structure is not corrupt. */
GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id);
gva = vcpu_args->gva;
pages = vcpu_args->pages;
while (true) {
for (i = 0; i < pages; i++) {
uint64_t addr = gva + (i * perf_test_args.guest_page_size);
if (i % perf_test_args.wr_fract == 0)
*(uint64_t *)addr = 0x0123456789ABCDEF;
else
READ_ONCE(*(uint64_t *)addr);
}
GUEST_SYNC(1);
}
}
static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
uint64_t vcpu_memory_bytes)
{
struct kvm_vm *vm;
uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
uint64_t guest_num_pages;
/* Account for a few pages per-vCPU for stacks */
pages += DEFAULT_STACK_PGS * vcpus;
/*
* Reserve twice the ammount of memory needed to map the test region and
* the page table / stacks region, at 4k, for page tables. Do the
* calculation with 4K page size: the smallest of all archs. (e.g., 64K
* page size guest will need even less memory for page tables).
*/
pages += (2 * pages) / PTES_PER_4K_PT;
pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
PTES_PER_4K_PT;
pages = vm_adjust_num_guest_pages(mode, pages);
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
vm = vm_create(mode, pages, O_RDWR);
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
#ifdef __x86_64__
vm_create_irqchip(vm);
#endif
perf_test_args.vm = vm;
perf_test_args.guest_page_size = vm_get_page_size(vm);
perf_test_args.host_page_size = getpagesize();
TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
"Guest memory size is not guest page size aligned.");
guest_num_pages = (vcpus * vcpu_memory_bytes) /
perf_test_args.guest_page_size;
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
/*
* If there should be more memory in the guest test region than there
* can be pages in the guest, it will definitely cause problems.
*/
TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
"Requested more guest memory than address space allows.\n"
" guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
guest_num_pages, vm_get_max_gfn(vm), vcpus,
vcpu_memory_bytes);
TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
"Guest memory size is not host page size aligned.");
guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
perf_test_args.guest_page_size;
guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
#ifdef __s390x__
/* Align to 1M (segment size) */
guest_test_phys_mem &= ~((1 << 20) - 1);
#endif
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
/* Add an extra memory slot for testing */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
guest_test_phys_mem,
TEST_MEM_SLOT_INDEX,
guest_num_pages, 0);
/* Do mapping for the demand paging memory slot */
virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
ucall_init(vm, NULL);
return vm;
}
static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
{
vm_paddr_t vcpu_gpa;
struct vcpu_args *vcpu_args;
int vcpu_id;
for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
vm_vcpu_add_default(vm, vcpu_id, guest_code);
#ifdef __x86_64__
vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
#endif
vcpu_args->vcpu_id = vcpu_id;
vcpu_args->gva = guest_test_virt_mem +
(vcpu_id * vcpu_memory_bytes);
vcpu_args->pages = vcpu_memory_bytes /
perf_test_args.guest_page_size;
vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
}
}
#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
......@@ -64,5 +64,7 @@ int64_t timespec_to_ns(struct timespec ts);
struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
struct timespec timespec_diff_now(struct timespec start);
struct timespec timespec_div(struct timespec ts, int divisor);
#endif /* SELFTEST_KVM_TEST_UTIL_H */
......@@ -36,6 +36,8 @@
#define X86_CR4_SMAP (1ul << 21)
#define X86_CR4_PKE (1ul << 22)
#define UNEXPECTED_VECTOR_PORT 0xfff0u
/* General Registers in 64-Bit Mode */
struct gpr64_regs {
u64 rax;
......@@ -59,7 +61,7 @@ struct gpr64_regs {
struct desc64 {
uint16_t limit0;
uint16_t base0;
unsigned base1:8, s:1, type:4, dpl:2, p:1;
unsigned base1:8, type:4, s:1, dpl:2, p:1;
unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
uint32_t base3;
uint32_t zero1;
......@@ -239,6 +241,11 @@ static inline struct desc_ptr get_idt(void)
return idt;
}
static inline void outl(uint16_t port, uint32_t value)
{
__asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
}
#define SET_XMM(__var, __xmm) \
asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
......@@ -338,6 +345,35 @@ uint32_t kvm_get_cpuid_max_basic(void);
uint32_t kvm_get_cpuid_max_extended(void);
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
struct ex_regs {
uint64_t rax, rcx, rdx, rbx;
uint64_t rbp, rsi, rdi;
uint64_t r8, r9, r10, r11;
uint64_t r12, r13, r14, r15;
uint64_t vector;
uint64_t error_code;
uint64_t rip;
uint64_t cs;
uint64_t rflags;
};
void vm_init_descriptor_tables(struct kvm_vm *vm);
void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
void vm_handle_exception(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *));
/*
* set_cpuid() - overwrites a matching cpuid entry with the provided value.
* matches based on ent->function && ent->index. returns true
* if a match was found and successfully overwritten.
* @cpuid: the kvm cpuid list to modify.
* @ent: cpuid entry to insert
*/
bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent);
uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
uint64_t a3);
/*
* Basic CPU control in CR0
*/
......
......@@ -350,3 +350,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
va_end(ap);
}
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
{
}
......@@ -94,6 +94,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
struct kvm_run *run = vcpu_state(vm, vcpu_id);
struct ucall ucall = {};
if (uc)
memset(uc, 0, sizeof(*uc));
if (run->exit_reason == KVM_EXIT_MMIO &&
run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
vm_vaddr_t gva;
......
......@@ -86,6 +86,34 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
return ret;
}
/* VCPU Enable Capability
*
* Input Args:
* vm - Virtual Machine
* vcpu_id - VCPU
* cap - Capability
*
* Output Args: None
*
* Return: On success, 0. On failure a TEST_ASSERT failure is produced.
*
* Enables a capability (KVM_CAP_*) on the VCPU.
*/
int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_enable_cap *cap)
{
struct vcpu *vcpu = vcpu_find(vm, vcpu_id);
int r;
TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id);
r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap);
TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n"
" rc: %i, errno: %i", r, errno);
return r;
}
static void vm_open(struct kvm_vm *vm, int perm)
{
vm->kvm_fd = open(KVM_DEV_PATH, perm);
......@@ -152,7 +180,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params)
* descriptor to control the created VM is created with the permissions
* given by perm (e.g. O_RDWR).
*/
struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
{
struct kvm_vm *vm;
......@@ -243,11 +271,6 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
return vm;
}
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
{
return _vm_create(mode, phy_pages, perm);
}
/*
* VM Restart
*
......@@ -1204,6 +1227,9 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
do {
rc = ioctl(vcpu->fd, KVM_RUN, NULL);
} while (rc == -1 && errno == EINTR);
assert_on_unhandled_exception(vm, vcpuid);
return rc;
}
......@@ -1260,6 +1286,35 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
"rc: %i errno: %i", ret, errno);
}
/*
* VM VCPU Get Reg List
*
* Input Args:
* vm - Virtual Machine
* vcpuid - VCPU ID
*
* Output Args:
* None
*
* Return:
* A pointer to an allocated struct kvm_reg_list
*
* Get the list of guest registers which are supported for
* KVM_GET_ONE_REG/KVM_SET_ONE_REG calls
*/
struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid)
{
struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
int ret;
ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, &reg_list_n);
TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
reg_list->n = reg_list_n.n;
vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list);
return reg_list;
}
/*
* VM VCPU Regs Get
*
......
......@@ -50,6 +50,8 @@ struct kvm_vm {
vm_paddr_t pgd;
vm_vaddr_t gdt;
vm_vaddr_t tss;
vm_vaddr_t idt;
vm_vaddr_t handlers;
};
struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
......
......@@ -241,3 +241,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
}
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
{
}
......@@ -38,6 +38,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
struct kvm_run *run = vcpu_state(vm, vcpu_id);
struct ucall ucall = {};
if (uc)
memset(uc, 0, sizeof(*uc));
if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
run->s390_sieic.icptcode == 4 &&
(run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */
......
......@@ -4,10 +4,13 @@
*
* Copyright (C) 2020, Google LLC.
*/
#include <stdlib.h>
#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <assert.h>
#include <stdlib.h>
#include <time.h>
#include "test_util.h"
/*
......@@ -81,6 +84,21 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2)
return timespec_add_ns((struct timespec){0}, ns1 - ns2);
}
struct timespec timespec_diff_now(struct timespec start)
{
struct timespec end;
clock_gettime(CLOCK_MONOTONIC, &end);
return timespec_sub(end, start);
}
struct timespec timespec_div(struct timespec ts, int divisor)
{
int64_t ns = timespec_to_ns(ts) / divisor;
return timespec_add_ns((struct timespec){0}, ns);
}
void print_skip(const char *fmt, ...)
{
va_list ap;
......
handle_exception:
push %r15
push %r14
push %r13
push %r12
push %r11
push %r10
push %r9
push %r8
push %rdi
push %rsi
push %rbp
push %rbx
push %rdx
push %rcx
push %rax
mov %rsp, %rdi
call route_exception
pop %rax
pop %rcx
pop %rdx
pop %rbx
pop %rbp
pop %rsi
pop %rdi
pop %r8
pop %r9
pop %r10
pop %r11
pop %r12
pop %r13
pop %r14
pop %r15
/* Discard vector and error code. */
add $16, %rsp
iretq
/*
* Build the handle_exception wrappers which push the vector/error code on the
* stack and an array of pointers to those wrappers.
*/
.pushsection .rodata
.globl idt_handlers
idt_handlers:
.popsection
.macro HANDLERS has_error from to
vector = \from
.rept \to - \from + 1
.align 8
/* Fetch current address and append it to idt_handlers. */
current_handler = .
.pushsection .rodata
.quad current_handler
.popsection
.if ! \has_error
pushq $0
.endif
pushq $vector
jmp handle_exception
vector = vector + 1
.endr
.endm
.global idt_handler_code
idt_handler_code:
HANDLERS has_error=0 from=0 to=7
HANDLERS has_error=1 from=8 to=8
HANDLERS has_error=0 from=9 to=9
HANDLERS has_error=1 from=10 to=14
HANDLERS has_error=0 from=15 to=16
HANDLERS has_error=1 from=17 to=17
HANDLERS has_error=0 from=18 to=255
.section .note.GNU-stack, "", %progbits
......@@ -12,9 +12,18 @@
#include "../kvm_util_internal.h"
#include "processor.h"
#ifndef NUM_INTERRUPTS
#define NUM_INTERRUPTS 256
#endif
#define DEFAULT_CODE_SELECTOR 0x8
#define DEFAULT_DATA_SELECTOR 0x10
/* Minimum physical address used for virtual translation tables. */
#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
vm_vaddr_t exception_handlers;
/* Virtual translation table structure declarations */
struct pageMapL4Entry {
uint64_t present:1;
......@@ -392,11 +401,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
desc->limit0 = segp->limit & 0xFFFF;
desc->base0 = segp->base & 0xFFFF;
desc->base1 = segp->base >> 16;
desc->s = segp->s;
desc->type = segp->type;
desc->s = segp->s;
desc->dpl = segp->dpl;
desc->p = segp->present;
desc->limit1 = segp->limit >> 16;
desc->avl = segp->avl;
desc->l = segp->l;
desc->db = segp->db;
desc->g = segp->g;
......@@ -556,9 +566,9 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
kvm_seg_set_unusable(&sregs.ldt);
kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
break;
......@@ -1118,3 +1128,131 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
*va_bits = (entry->eax >> 8) & 0xff;
}
}
struct idt_entry {
uint16_t offset0;
uint16_t selector;
uint16_t ist : 3;
uint16_t : 5;
uint16_t type : 4;
uint16_t : 1;
uint16_t dpl : 2;
uint16_t p : 1;
uint16_t offset1;
uint32_t offset2; uint32_t reserved;
};
static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
int dpl, unsigned short selector)
{
struct idt_entry *base =
(struct idt_entry *)addr_gva2hva(vm, vm->idt);
struct idt_entry *e = &base[vector];
memset(e, 0, sizeof(*e));
e->offset0 = addr;
e->selector = selector;
e->ist = 0;
e->type = 14;
e->dpl = dpl;
e->p = 1;
e->offset1 = addr >> 16;
e->offset2 = addr >> 32;
}
void kvm_exit_unexpected_vector(uint32_t value)
{
outl(UNEXPECTED_VECTOR_PORT, value);
}
void route_exception(struct ex_regs *regs)
{
typedef void(*handler)(struct ex_regs *);
handler *handlers = (handler *)exception_handlers;
if (handlers && handlers[regs->vector]) {
handlers[regs->vector](regs);
return;
}
kvm_exit_unexpected_vector(regs->vector);
}
void vm_init_descriptor_tables(struct kvm_vm *vm)
{
extern void *idt_handlers;
int i;
vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0);
vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0);
/* Handlers have the same address in both address spaces.*/
for (i = 0; i < NUM_INTERRUPTS; i++)
set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
DEFAULT_CODE_SELECTOR);
}
void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
{
struct kvm_sregs sregs;
vcpu_sregs_get(vm, vcpuid, &sregs);
sregs.idt.base = vm->idt;
sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
sregs.gdt.base = vm->gdt;
sregs.gdt.limit = getpagesize() - 1;
kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
vcpu_sregs_set(vm, vcpuid, &sregs);
*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
}
void vm_handle_exception(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *))
{
vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
handlers[vector] = (vm_vaddr_t)handler;
}
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
{
if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
&& vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
&& vcpu_state(vm, vcpuid)->io.size == 4) {
/* Grab pointer to io data */
uint32_t *data = (void *)vcpu_state(vm, vcpuid)
+ vcpu_state(vm, vcpuid)->io.data_offset;
TEST_ASSERT(false,
"Unexpected vectored event in guest (vector:0x%x)",
*data);
}
}
bool set_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 *ent)
{
int i;
for (i = 0; i < cpuid->nent; i++) {
struct kvm_cpuid_entry2 *cur = &cpuid->entries[i];
if (cur->function != ent->function || cur->index != ent->index)
continue;
memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2));
return true;
}
return false;
}
uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
uint64_t a3)
{
uint64_t r;
asm volatile("vmcall"
: "=a"(r)
: "b"(a0), "c"(a1), "d"(a2), "S"(a3));
return r;
}
......@@ -40,6 +40,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
struct kvm_run *run = vcpu_state(vm, vcpu_id);
struct ucall ucall = {};
if (uc)
memset(uc, 0, sizeof(*uc));
if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
struct kvm_regs regs;
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2020, Google LLC.
*
* Tests for KVM paravirtual feature disablement
*/
#include <asm/kvm_para.h>
#include <linux/kvm_para.h>
#include <stdint.h>
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
extern unsigned char rdmsr_start;
extern unsigned char rdmsr_end;
static u64 do_rdmsr(u32 idx)
{
u32 lo, hi;
asm volatile("rdmsr_start: rdmsr;"
"rdmsr_end:"
: "=a"(lo), "=c"(hi)
: "c"(idx));
return (((u64) hi) << 32) | lo;
}
extern unsigned char wrmsr_start;
extern unsigned char wrmsr_end;
static void do_wrmsr(u32 idx, u64 val)
{
u32 lo, hi;
lo = val;
hi = val >> 32;
asm volatile("wrmsr_start: wrmsr;"
"wrmsr_end:"
: : "a"(lo), "c"(idx), "d"(hi));
}
static int nr_gp;
static void guest_gp_handler(struct ex_regs *regs)
{
unsigned char *rip = (unsigned char *)regs->rip;
bool r, w;
r = rip == &rdmsr_start;
w = rip == &wrmsr_start;
GUEST_ASSERT(r || w);
nr_gp++;
if (r)
regs->rip = (uint64_t)&rdmsr_end;
else
regs->rip = (uint64_t)&wrmsr_end;
}
struct msr_data {
uint32_t idx;
const char *name;
};
#define TEST_MSR(msr) { .idx = msr, .name = #msr }
#define UCALL_PR_MSR 0xdeadbeef
#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
/*
* KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
* written, as the KVM_CPUID_FEATURES leaf is cleared.
*/
static struct msr_data msrs_to_test[] = {
TEST_MSR(MSR_KVM_SYSTEM_TIME),
TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
TEST_MSR(MSR_KVM_WALL_CLOCK),
TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
TEST_MSR(MSR_KVM_ASYNC_PF_EN),
TEST_MSR(MSR_KVM_STEAL_TIME),
TEST_MSR(MSR_KVM_PV_EOI_EN),
TEST_MSR(MSR_KVM_POLL_CONTROL),
TEST_MSR(MSR_KVM_ASYNC_PF_INT),
TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
};
static void test_msr(struct msr_data *msr)
{
PR_MSR(msr);
do_rdmsr(msr->idx);
GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
nr_gp = 0;
do_wrmsr(msr->idx, 0);
GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
nr_gp = 0;
}
struct hcall_data {
uint64_t nr;
const char *name;
};
#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
#define UCALL_PR_HCALL 0xdeadc0de
#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
/*
* KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
* features have been cleared in KVM_CPUID_FEATURES.
*/
static struct hcall_data hcalls_to_test[] = {
TEST_HCALL(KVM_HC_KICK_CPU),
TEST_HCALL(KVM_HC_SEND_IPI),
TEST_HCALL(KVM_HC_SCHED_YIELD),
};
static void test_hcall(struct hcall_data *hc)
{
uint64_t r;
PR_HCALL(hc);
r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
GUEST_ASSERT(r == -KVM_ENOSYS);
}
static void guest_main(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
test_msr(&msrs_to_test[i]);
}
for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
test_hcall(&hcalls_to_test[i]);
}
GUEST_DONE();
}
static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid)
{
struct kvm_cpuid_entry2 ent = {0};
ent.function = KVM_CPUID_FEATURES;
TEST_ASSERT(set_cpuid(cpuid, &ent),
"failed to clear KVM_CPUID_FEATURES leaf");
}
static void pr_msr(struct ucall *uc)
{
struct msr_data *msr = (struct msr_data *)uc->args[0];
pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
}
static void pr_hcall(struct ucall *uc)
{
struct hcall_data *hc = (struct hcall_data *)uc->args[0];
pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
}
static void handle_abort(struct ucall *uc)
{
TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0],
__FILE__, uc->args[1]);
}
#define VCPU_ID 0
static void enter_guest(struct kvm_vm *vm)
{
struct kvm_run *run;
struct ucall uc;
int r;
run = vcpu_state(vm, VCPU_ID);
while (true) {
r = _vcpu_run(vm, VCPU_ID);
TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"unexpected exit reason: %u (%s)",
run->exit_reason, exit_reason_str(run->exit_reason));
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_PR_MSR:
pr_msr(&uc);
break;
case UCALL_PR_HCALL:
pr_hcall(&uc);
break;
case UCALL_ABORT:
handle_abort(&uc);
return;
case UCALL_DONE:
return;
}
}
}
int main(void)
{
struct kvm_enable_cap cap = {0};
struct kvm_cpuid2 *best;
struct kvm_vm *vm;
if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) {
pr_info("will skip kvm paravirt restriction tests.\n");
return 0;
}
vm = vm_create_default(VCPU_ID, 0, guest_main);
cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID;
cap.args[0] = 1;
vcpu_enable_cap(vm, VCPU_ID, &cap);
best = kvm_get_supported_cpuid();
clear_kvm_cpuid_features(best);
vcpu_set_cpuid(vm, VCPU_ID, best);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
enter_guest(vm);
kvm_vm_free(vm);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment