Commit f3650842 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge branch 'kvm-coco-pagefault-prep' into HEAD

A combination of prep work for TDX and SNP, and a clean up of the
page fault path to (hopefully) make it easier to follow the rules for
private memory, noslot faults, writes to read-only slots, etc.
parents 1e21b538 2b1f4355
...@@ -254,28 +254,31 @@ enum x86_intercept_stage; ...@@ -254,28 +254,31 @@ enum x86_intercept_stage;
KVM_GUESTDBG_INJECT_DB | \ KVM_GUESTDBG_INJECT_DB | \
KVM_GUESTDBG_BLOCKIRQ) KVM_GUESTDBG_BLOCKIRQ)
#define PFERR_PRESENT_MASK BIT(0)
#define PFERR_WRITE_MASK BIT(1)
#define PFERR_USER_MASK BIT(2)
#define PFERR_RSVD_MASK BIT(3)
#define PFERR_FETCH_MASK BIT(4)
#define PFERR_PK_MASK BIT(5)
#define PFERR_SGX_MASK BIT(15)
#define PFERR_GUEST_RMP_MASK BIT_ULL(31)
#define PFERR_GUEST_FINAL_MASK BIT_ULL(32)
#define PFERR_GUEST_PAGE_MASK BIT_ULL(33)
#define PFERR_GUEST_ENC_MASK BIT_ULL(34)
#define PFERR_GUEST_SIZEM_MASK BIT_ULL(35)
#define PFERR_GUEST_VMPL_MASK BIT_ULL(36)
#define PFERR_PRESENT_BIT 0 /*
#define PFERR_WRITE_BIT 1 * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP checks
#define PFERR_USER_BIT 2 * when emulating instructions that triggers implicit access.
#define PFERR_RSVD_BIT 3 */
#define PFERR_FETCH_BIT 4 #define PFERR_IMPLICIT_ACCESS BIT_ULL(48)
#define PFERR_PK_BIT 5 /*
#define PFERR_SGX_BIT 15 * PRIVATE_ACCESS is a KVM-defined flag us to indicate that a fault occurred
#define PFERR_GUEST_FINAL_BIT 32 * when the guest was accessing private memory.
#define PFERR_GUEST_PAGE_BIT 33 */
#define PFERR_IMPLICIT_ACCESS_BIT 48 #define PFERR_PRIVATE_ACCESS BIT_ULL(49)
#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS)
#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT)
#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT)
#define PFERR_USER_MASK BIT(PFERR_USER_BIT)
#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT)
#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT)
#define PFERR_PK_MASK BIT(PFERR_PK_BIT)
#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT)
#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT)
#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
PFERR_WRITE_MASK | \ PFERR_WRITE_MASK | \
...@@ -1848,6 +1851,7 @@ struct kvm_arch_async_pf { ...@@ -1848,6 +1851,7 @@ struct kvm_arch_async_pf {
gfn_t gfn; gfn_t gfn;
unsigned long cr3; unsigned long cr3;
bool direct_map; bool direct_map;
u64 error_code;
}; };
extern u32 __read_mostly kvm_nr_uret_msrs; extern u32 __read_mostly kvm_nr_uret_msrs;
......
...@@ -213,7 +213,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ...@@ -213,7 +213,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
*/ */
u64 implicit_access = access & PFERR_IMPLICIT_ACCESS; u64 implicit_access = access & PFERR_IMPLICIT_ACCESS;
bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC; bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC;
int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1; int index = (pfec | (not_smap ? PFERR_RSVD_MASK : 0)) >> 1;
u32 errcode = PFERR_PRESENT_MASK; u32 errcode = PFERR_PRESENT_MASK;
bool fault; bool fault;
...@@ -234,8 +234,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ...@@ -234,8 +234,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3; pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;
/* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */ /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
offset = (pfec & ~1) + offset = (pfec & ~1) | ((pte_access & PT_USER_MASK) ? PFERR_RSVD_MASK : 0);
((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
pkru_bits &= mmu->pkru_mask >> offset; pkru_bits &= mmu->pkru_mask >> offset;
errcode |= -pkru_bits & PFERR_PK_MASK; errcode |= -pkru_bits & PFERR_PK_MASK;
......
...@@ -3262,9 +3262,19 @@ static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu, ...@@ -3262,9 +3262,19 @@ static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu,
{ {
gva_t gva = fault->is_tdp ? 0 : fault->addr; gva_t gva = fault->is_tdp ? 0 : fault->addr;
if (fault->is_private) {
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
return -EFAULT;
}
vcpu_cache_mmio_info(vcpu, gva, fault->gfn, vcpu_cache_mmio_info(vcpu, gva, fault->gfn,
access & shadow_mmio_access_mask); access & shadow_mmio_access_mask);
fault->slot = NULL;
fault->pfn = KVM_PFN_NOSLOT;
fault->map_writable = false;
fault->hva = KVM_HVA_ERR_BAD;
/* /*
* If MMIO caching is disabled, emulate immediately without * If MMIO caching is disabled, emulate immediately without
* touching the shadow page tables as attempting to install an * touching the shadow page tables as attempting to install an
...@@ -4207,24 +4217,28 @@ static u32 alloc_apf_token(struct kvm_vcpu *vcpu) ...@@ -4207,24 +4217,28 @@ static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
} }
static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu,
gfn_t gfn) struct kvm_page_fault *fault)
{ {
struct kvm_arch_async_pf arch; struct kvm_arch_async_pf arch;
arch.token = alloc_apf_token(vcpu); arch.token = alloc_apf_token(vcpu);
arch.gfn = gfn; arch.gfn = fault->gfn;
arch.error_code = fault->error_code;
arch.direct_map = vcpu->arch.mmu->root_role.direct; arch.direct_map = vcpu->arch.mmu->root_role.direct;
arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu); arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu);
return kvm_setup_async_pf(vcpu, cr2_or_gpa, return kvm_setup_async_pf(vcpu, fault->addr,
kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); kvm_vcpu_gfn_to_hva(vcpu, fault->gfn), &arch);
} }
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
{ {
int r; int r;
if (WARN_ON_ONCE(work->arch.error_code & PFERR_PRIVATE_ACCESS))
return;
if ((vcpu->arch.mmu->root_role.direct != work->arch.direct_map) || if ((vcpu->arch.mmu->root_role.direct != work->arch.direct_map) ||
work->wakeup_all) work->wakeup_all)
return; return;
...@@ -4237,7 +4251,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) ...@@ -4237,7 +4251,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu)) work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu))
return; return;
kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL); kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, work->arch.error_code, true, NULL);
} }
static inline u8 kvm_max_level_for_order(int order) static inline u8 kvm_max_level_for_order(int order)
...@@ -4257,14 +4271,6 @@ static inline u8 kvm_max_level_for_order(int order) ...@@ -4257,14 +4271,6 @@ static inline u8 kvm_max_level_for_order(int order)
return PG_LEVEL_4K; return PG_LEVEL_4K;
} }
static void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault)
{
kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT,
PAGE_SIZE, fault->write, fault->exec,
fault->is_private);
}
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault) struct kvm_page_fault *fault)
{ {
...@@ -4291,48 +4297,15 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, ...@@ -4291,48 +4297,15 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{ {
struct kvm_memory_slot *slot = fault->slot;
bool async; bool async;
/*
* Retry the page fault if the gfn hit a memslot that is being deleted
* or moved. This ensures any existing SPTEs for the old memslot will
* be zapped before KVM inserts a new MMIO SPTE for the gfn.
*/
if (slot && (slot->flags & KVM_MEMSLOT_INVALID))
return RET_PF_RETRY;
if (!kvm_is_visible_memslot(slot)) {
/* Don't expose private memslots to L2. */
if (is_guest_mode(vcpu)) {
fault->slot = NULL;
fault->pfn = KVM_PFN_NOSLOT;
fault->map_writable = false;
return RET_PF_CONTINUE;
}
/*
* If the APIC access page exists but is disabled, go directly
* to emulation without caching the MMIO access or creating a
* MMIO SPTE. That way the cache doesn't need to be purged
* when the AVIC is re-enabled.
*/
if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT &&
!kvm_apicv_activated(vcpu->kvm))
return RET_PF_EMULATE;
}
if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
return -EFAULT;
}
if (fault->is_private) if (fault->is_private)
return kvm_faultin_pfn_private(vcpu, fault); return kvm_faultin_pfn_private(vcpu, fault);
async = false; async = false;
fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async, fault->pfn = __gfn_to_pfn_memslot(fault->slot, fault->gfn, false, false,
fault->write, &fault->map_writable, &async, fault->write,
&fault->hva); &fault->map_writable, &fault->hva);
if (!async) if (!async)
return RET_PF_CONTINUE; /* *pfn has correct page already */ return RET_PF_CONTINUE; /* *pfn has correct page already */
...@@ -4342,7 +4315,7 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault ...@@ -4342,7 +4315,7 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn); trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn);
kvm_make_request(KVM_REQ_APF_HALT, vcpu); kvm_make_request(KVM_REQ_APF_HALT, vcpu);
return RET_PF_RETRY; return RET_PF_RETRY;
} else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) { } else if (kvm_arch_setup_async_pf(vcpu, fault)) {
return RET_PF_RETRY; return RET_PF_RETRY;
} }
} }
...@@ -4352,17 +4325,72 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault ...@@ -4352,17 +4325,72 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
* to wait for IO. Note, gup always bails if it is unable to quickly * to wait for IO. Note, gup always bails if it is unable to quickly
* get a page and a fatal signal, i.e. SIGKILL, is pending. * get a page and a fatal signal, i.e. SIGKILL, is pending.
*/ */
fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, true, NULL, fault->pfn = __gfn_to_pfn_memslot(fault->slot, fault->gfn, false, true,
fault->write, &fault->map_writable, NULL, fault->write,
&fault->hva); &fault->map_writable, &fault->hva);
return RET_PF_CONTINUE; return RET_PF_CONTINUE;
} }
static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
unsigned int access) unsigned int access)
{ {
struct kvm_memory_slot *slot = fault->slot;
int ret; int ret;
/*
* Note that the mmu_invalidate_seq also serves to detect a concurrent
* change in attributes. is_page_fault_stale() will detect an
* invalidation relate to fault->fn and resume the guest without
* installing a mapping in the page tables.
*/
fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
smp_rmb();
/*
* Now that we have a snapshot of mmu_invalidate_seq we can check for a
* private vs. shared mismatch.
*/
if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
return -EFAULT;
}
if (unlikely(!slot))
return kvm_handle_noslot_fault(vcpu, fault, access);
/*
* Retry the page fault if the gfn hit a memslot that is being deleted
* or moved. This ensures any existing SPTEs for the old memslot will
* be zapped before KVM inserts a new MMIO SPTE for the gfn.
*/
if (slot->flags & KVM_MEMSLOT_INVALID)
return RET_PF_RETRY;
if (slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT) {
/*
* Don't map L1's APIC access page into L2, KVM doesn't support
* using APICv/AVIC to accelerate L2 accesses to L1's APIC,
* i.e. the access needs to be emulated. Emulating access to
* L1's APIC is also correct if L1 is accelerating L2's own
* virtual APIC, but for some reason L1 also maps _L1's_ APIC
* into L2. Note, vcpu_is_mmio_gpa() always treats access to
* the APIC as MMIO. Allow an MMIO SPTE to be created, as KVM
* uses different roots for L1 vs. L2, i.e. there is no danger
* of breaking APICv/AVIC for L1.
*/
if (is_guest_mode(vcpu))
return kvm_handle_noslot_fault(vcpu, fault, access);
/*
* If the APIC access page exists but is disabled, go directly
* to emulation without caching the MMIO access or creating a
* MMIO SPTE. That way the cache doesn't need to be purged
* when the AVIC is re-enabled.
*/
if (!kvm_apicv_activated(vcpu->kvm))
return RET_PF_EMULATE;
}
fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq; fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
smp_rmb(); smp_rmb();
...@@ -4387,8 +4415,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, ...@@ -4387,8 +4415,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
* *guaranteed* to need to retry, i.e. waiting until mmu_lock is held * *guaranteed* to need to retry, i.e. waiting until mmu_lock is held
* to detect retry guarantees the worst case latency for the vCPU. * to detect retry guarantees the worst case latency for the vCPU.
*/ */
if (fault->slot && if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn))
mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn))
return RET_PF_RETRY; return RET_PF_RETRY;
ret = __kvm_faultin_pfn(vcpu, fault); ret = __kvm_faultin_pfn(vcpu, fault);
...@@ -4398,7 +4425,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, ...@@ -4398,7 +4425,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
if (unlikely(is_error_pfn(fault->pfn))) if (unlikely(is_error_pfn(fault->pfn)))
return kvm_handle_error_pfn(vcpu, fault); return kvm_handle_error_pfn(vcpu, fault);
if (unlikely(!fault->slot)) if (WARN_ON_ONCE(!fault->slot || is_noslot_pfn(fault->pfn)))
return kvm_handle_noslot_fault(vcpu, fault, access); return kvm_handle_noslot_fault(vcpu, fault, access);
/* /*
...@@ -4509,6 +4536,16 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, ...@@ -4509,6 +4536,16 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
if (WARN_ON_ONCE(fault_address >> 32)) if (WARN_ON_ONCE(fault_address >> 32))
return -EFAULT; return -EFAULT;
#endif #endif
/*
* Legacy #PF exception only have a 32-bit error code. Simply drop the
* upper bits as KVM doesn't use them for #PF (because they are never
* set), and to ensure there are no collisions with KVM-defined bits.
*/
if (WARN_ON_ONCE(error_code >> 32))
error_code = lower_32_bits(error_code);
/* Ensure the above sanity check also covers KVM-defined flags. */
BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK));
vcpu->arch.l1tf_flush_l1d = true; vcpu->arch.l1tf_flush_l1d = true;
if (!flags) { if (!flags) {
...@@ -5794,30 +5831,35 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err ...@@ -5794,30 +5831,35 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
int r, emulation_type = EMULTYPE_PF; int r, emulation_type = EMULTYPE_PF;
bool direct = vcpu->arch.mmu->root_role.direct; bool direct = vcpu->arch.mmu->root_role.direct;
/*
* IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP
* checks when emulating instructions that triggers implicit access.
* WARN if hardware generates a fault with an error code that collides
* with the KVM-defined value. Clear the flag and continue on, i.e.
* don't terminate the VM, as KVM can't possibly be relying on a flag
* that KVM doesn't know about.
*/
if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS))
error_code &= ~PFERR_IMPLICIT_ACCESS;
if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
return RET_PF_RETRY; return RET_PF_RETRY;
/*
* Except for reserved faults (emulated MMIO is shared-only), set the
* PFERR_PRIVATE_ACCESS flag for software-protected VMs based on the gfn's
* current attributes, which are the source of truth for such VMs. Note,
* this wrong for nested MMUs as the GPA is an L2 GPA, but KVM doesn't
* currently supported nested virtualization (among many other things)
* for software-protected VMs.
*/
if (IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) &&
!(error_code & PFERR_RSVD_MASK) &&
vcpu->kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM &&
kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)))
error_code |= PFERR_PRIVATE_ACCESS;
r = RET_PF_INVALID; r = RET_PF_INVALID;
if (unlikely(error_code & PFERR_RSVD_MASK)) { if (unlikely(error_code & PFERR_RSVD_MASK)) {
if (WARN_ON_ONCE(error_code & PFERR_PRIVATE_ACCESS))
return -EFAULT;
r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct); r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct);
if (r == RET_PF_EMULATE) if (r == RET_PF_EMULATE)
goto emulate; goto emulate;
} }
if (r == RET_PF_INVALID) { if (r == RET_PF_INVALID) {
r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, error_code, false,
lower_32_bits(error_code), false,
&emulation_type); &emulation_type);
if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm)) if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
return -EIO; return -EIO;
......
...@@ -190,7 +190,7 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm) ...@@ -190,7 +190,7 @@ static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
struct kvm_page_fault { struct kvm_page_fault {
/* arguments to kvm_mmu_do_page_fault. */ /* arguments to kvm_mmu_do_page_fault. */
const gpa_t addr; const gpa_t addr;
const u32 error_code; const u64 error_code;
const bool prefetch; const bool prefetch;
/* Derived from error_code. */ /* Derived from error_code. */
...@@ -279,8 +279,16 @@ enum { ...@@ -279,8 +279,16 @@ enum {
RET_PF_SPURIOUS, RET_PF_SPURIOUS,
}; };
static inline void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault)
{
kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT,
PAGE_SIZE, fault->write, fault->exec,
fault->is_private);
}
static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
u32 err, bool prefetch, int *emulation_type) u64 err, bool prefetch, int *emulation_type)
{ {
struct kvm_page_fault fault = { struct kvm_page_fault fault = {
.addr = cr2_or_gpa, .addr = cr2_or_gpa,
...@@ -298,7 +306,10 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ...@@ -298,7 +306,10 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
.max_level = KVM_MAX_HUGEPAGE_LEVEL, .max_level = KVM_MAX_HUGEPAGE_LEVEL,
.req_level = PG_LEVEL_4K, .req_level = PG_LEVEL_4K,
.goal_level = PG_LEVEL_4K, .goal_level = PG_LEVEL_4K,
.is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT), .is_private = err & PFERR_PRIVATE_ACCESS,
.pfn = KVM_PFN_ERR_FAULT,
.hva = KVM_HVA_ERR_BAD,
}; };
int r; int r;
...@@ -320,6 +331,17 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ...@@ -320,6 +331,17 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
else else
r = vcpu->arch.mmu->page_fault(vcpu, &fault); r = vcpu->arch.mmu->page_fault(vcpu, &fault);
/*
* Not sure what's happening, but punt to userspace and hope that
* they can fix it by changing memory to shared, or they can
* provide a better error.
*/
if (r == RET_PF_EMULATE && fault.is_private) {
pr_warn_ratelimited("kvm: unexpected emulation request on private memory\n");
kvm_mmu_prepare_memory_fault_exit(vcpu, &fault);
return -EFAULT;
}
if (fault.write_fault_to_shadow_pgtable && emulation_type) if (fault.write_fault_to_shadow_pgtable && emulation_type)
*emulation_type |= EMULTYPE_WRITE_PF_TO_SP; *emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
......
...@@ -260,7 +260,7 @@ TRACE_EVENT( ...@@ -260,7 +260,7 @@ TRACE_EVENT(
TP_STRUCT__entry( TP_STRUCT__entry(
__field(int, vcpu_id) __field(int, vcpu_id)
__field(gpa_t, cr2_or_gpa) __field(gpa_t, cr2_or_gpa)
__field(u32, error_code) __field(u64, error_code)
__field(u64 *, sptep) __field(u64 *, sptep)
__field(u64, old_spte) __field(u64, old_spte)
__field(u64, new_spte) __field(u64, new_spte)
......
...@@ -2047,6 +2047,15 @@ static int npf_interception(struct kvm_vcpu *vcpu) ...@@ -2047,6 +2047,15 @@ static int npf_interception(struct kvm_vcpu *vcpu)
u64 fault_address = svm->vmcb->control.exit_info_2; u64 fault_address = svm->vmcb->control.exit_info_2;
u64 error_code = svm->vmcb->control.exit_info_1; u64 error_code = svm->vmcb->control.exit_info_1;
/*
* WARN if hardware generates a fault with an error code that collides
* with KVM-defined sythentic flags. Clear the flags and continue on,
* i.e. don't terminate the VM, as KVM can't possibly be relying on a
* flag that KVM doesn't know about.
*/
if (WARN_ON_ONCE(error_code & PFERR_SYNTHETIC_MASK))
error_code &= ~PFERR_SYNTHETIC_MASK;
trace_kvm_page_fault(vcpu, fault_address, error_code); trace_kvm_page_fault(vcpu, fault_address, error_code);
return kvm_mmu_page_fault(vcpu, fault_address, error_code, return kvm_mmu_page_fault(vcpu, fault_address, error_code,
static_cpu_has(X86_FEATURE_DECODEASSISTS) ? static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment