Commit c4371c2a authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini

KVM: x86/mmu: Return unique RET_PF_* values if the fault was fixed

Introduce RET_PF_FIXED and RET_PF_SPURIOUS to provide unique return
values instead of overloading RET_PF_RETRY.  In the short term, the
unique values add clarity to the code and RET_PF_SPURIOUS will be used
by set_spte() to avoid unnecessary work for spurious faults.

In the long term, TDX will use RET_PF_FIXED to deterministically map
memory during pre-boot.  The page fault flow may bail early for benign
reasons, e.g. if the mmu_notifier fires for an unrelated address.  With
only RET_PF_RETRY, it's impossible for the caller to distinguish between
"cool, page is mapped" and "darn, need to try again", and thus cannot
handle benign cases like the mmu_notifier retry.
Signed-off-by: default avatarSean Christopherson <sean.j.christopherson@intel.com>
Message-Id: <20200923220425.18402-4-sean.j.christopherson@intel.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 83a2ba4c
...@@ -198,17 +198,20 @@ module_param(dbg, bool, 0644); ...@@ -198,17 +198,20 @@ module_param(dbg, bool, 0644);
#define PTE_LIST_EXT 3 #define PTE_LIST_EXT 3
/* /*
* Return values of handle_mmio_page_fault and mmu.page_fault: * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
*
* RET_PF_RETRY: let CPU fault again on the address. * RET_PF_RETRY: let CPU fault again on the address.
* RET_PF_EMULATE: mmio page fault, emulate the instruction directly. * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
*
* For handle_mmio_page_fault only:
* RET_PF_INVALID: the spte is invalid, let the real page fault path update it. * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
* RET_PF_FIXED: The faulting entry has been fixed.
* RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU.
*/ */
enum { enum {
RET_PF_RETRY = 0, RET_PF_RETRY = 0,
RET_PF_EMULATE = 1, RET_PF_EMULATE,
RET_PF_INVALID = 2, RET_PF_INVALID,
RET_PF_FIXED,
RET_PF_SPURIOUS,
}; };
struct pte_list_desc { struct pte_list_desc {
...@@ -3083,7 +3086,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ...@@ -3083,7 +3086,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
int was_rmapped = 0; int was_rmapped = 0;
int rmap_count; int rmap_count;
int set_spte_ret; int set_spte_ret;
int ret = RET_PF_RETRY; int ret = RET_PF_FIXED;
bool flush = false; bool flush = false;
pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
...@@ -3491,21 +3494,19 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte) ...@@ -3491,21 +3494,19 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte)
} }
/* /*
* Return value: * Returns one of RET_PF_INVALID, RET_PF_FIXED or RET_PF_SPURIOUS.
* - true: let the vcpu to access on the same address again.
* - false: let the real page fault path to fix it.
*/ */
static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
u32 error_code) u32 error_code)
{ {
struct kvm_shadow_walk_iterator iterator; struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp; struct kvm_mmu_page *sp;
bool fault_handled = false; int ret = RET_PF_INVALID;
u64 spte = 0ull; u64 spte = 0ull;
uint retry_count = 0; uint retry_count = 0;
if (!page_fault_can_be_fast(error_code)) if (!page_fault_can_be_fast(error_code))
return false; return ret;
walk_shadow_page_lockless_begin(vcpu); walk_shadow_page_lockless_begin(vcpu);
...@@ -3531,7 +3532,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ...@@ -3531,7 +3532,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* they are always ACC_ALL. * they are always ACC_ALL.
*/ */
if (is_access_allowed(error_code, spte)) { if (is_access_allowed(error_code, spte)) {
fault_handled = true; ret = RET_PF_SPURIOUS;
break; break;
} }
...@@ -3574,11 +3575,11 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ...@@ -3574,11 +3575,11 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* since the gfn is not stable for indirect shadow page. See * since the gfn is not stable for indirect shadow page. See
* Documentation/virt/kvm/locking.rst to get more detail. * Documentation/virt/kvm/locking.rst to get more detail.
*/ */
fault_handled = fast_pf_fix_direct_spte(vcpu, sp, if (fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte,
iterator.sptep, spte, new_spte)) {
new_spte); ret = RET_PF_FIXED;
if (fault_handled)
break; break;
}
if (++retry_count > 4) { if (++retry_count > 4) {
printk_once(KERN_WARNING printk_once(KERN_WARNING
...@@ -3589,10 +3590,10 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ...@@ -3589,10 +3590,10 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
} while (true); } while (true);
trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep, trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep,
spte, fault_handled); spte, ret);
walk_shadow_page_lockless_end(vcpu); walk_shadow_page_lockless_end(vcpu);
return fault_handled; return ret;
} }
static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
...@@ -4104,8 +4105,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, ...@@ -4104,8 +4105,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
if (page_fault_handle_page_track(vcpu, error_code, gfn)) if (page_fault_handle_page_track(vcpu, error_code, gfn))
return RET_PF_EMULATE; return RET_PF_EMULATE;
if (fast_page_fault(vcpu, gpa, error_code)) r = fast_page_fault(vcpu, gpa, error_code);
return RET_PF_RETRY; if (r != RET_PF_INVALID)
return r;
r = mmu_topup_memory_caches(vcpu, false); r = mmu_topup_memory_caches(vcpu, false);
if (r) if (r)
......
...@@ -244,14 +244,11 @@ TRACE_EVENT( ...@@ -244,14 +244,11 @@ TRACE_EVENT(
__entry->access) __entry->access)
); );
#define __spte_satisfied(__spte) \
(__entry->retry && is_writable_pte(__entry->__spte))
TRACE_EVENT( TRACE_EVENT(
fast_page_fault, fast_page_fault,
TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code, TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code,
u64 *sptep, u64 old_spte, bool retry), u64 *sptep, u64 old_spte, int ret),
TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry), TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, ret),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(int, vcpu_id) __field(int, vcpu_id)
...@@ -260,7 +257,7 @@ TRACE_EVENT( ...@@ -260,7 +257,7 @@ TRACE_EVENT(
__field(u64 *, sptep) __field(u64 *, sptep)
__field(u64, old_spte) __field(u64, old_spte)
__field(u64, new_spte) __field(u64, new_spte)
__field(bool, retry) __field(int, ret)
), ),
TP_fast_assign( TP_fast_assign(
...@@ -270,7 +267,7 @@ TRACE_EVENT( ...@@ -270,7 +267,7 @@ TRACE_EVENT(
__entry->sptep = sptep; __entry->sptep = sptep;
__entry->old_spte = old_spte; __entry->old_spte = old_spte;
__entry->new_spte = *sptep; __entry->new_spte = *sptep;
__entry->retry = retry; __entry->ret = ret;
), ),
TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx" TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx"
...@@ -278,7 +275,7 @@ TRACE_EVENT( ...@@ -278,7 +275,7 @@ TRACE_EVENT(
__entry->cr2_or_gpa, __print_flags(__entry->error_code, "|", __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|",
kvm_mmu_trace_pferr_flags), __entry->sptep, kvm_mmu_trace_pferr_flags), __entry->sptep,
__entry->old_spte, __entry->new_spte, __entry->old_spte, __entry->new_spte,
__spte_satisfied(old_spte), __spte_satisfied(new_spte) __entry->ret == RET_PF_SPURIOUS, __entry->ret == RET_PF_FIXED
) )
); );
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment