Commit 33a31641 authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini

KVM: x86/mmu: Don't allow TDP MMU to yield when recovering NX pages

Prevent the TDP MMU from yielding when zapping a gfn range during NX
page recovery.  If a flush is pending from a previous invocation of the
zapping helper, either in the TDP MMU or the legacy MMU, but the TDP MMU
has not accumulated a flush for the current invocation, then yielding
will release mmu_lock with stale TLB entries.

That being said, this isn't technically a bug fix in the current code, as
the TDP MMU will never yield in this case.  tdp_mmu_iter_cond_resched()
will yield if and only if it has made forward progress, as defined by the
current gfn vs. the last yielded (or starting) gfn.  Because zapping a
single shadow page is guaranteed to (a) find that page and (b) step
sideways at the level of the shadow page, the TDP iter will break its loop
before getting a chance to yield.

But that is all very, very subtle, and will break at the slightest sneeze,
e.g. zapping while holding mmu_lock for read would break as the TDP MMU
wouldn't be guaranteed to see the present shadow page, and thus could step
sideways at a lower level.

Cc: Ben Gardon <bgardon@google.com>
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Message-Id: <20210325200119.1359384-4-seanjc@google.com>
[Add lockdep assertion. - Paolo]
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 048f4980
...@@ -5885,7 +5885,6 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) ...@@ -5885,7 +5885,6 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
unsigned int ratio; unsigned int ratio;
LIST_HEAD(invalid_list); LIST_HEAD(invalid_list);
bool flush = false; bool flush = false;
gfn_t gfn_end;
ulong to_zap; ulong to_zap;
rcu_idx = srcu_read_lock(&kvm->srcu); rcu_idx = srcu_read_lock(&kvm->srcu);
...@@ -5907,8 +5906,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) ...@@ -5907,8 +5906,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
lpage_disallowed_link); lpage_disallowed_link);
WARN_ON_ONCE(!sp->lpage_disallowed); WARN_ON_ONCE(!sp->lpage_disallowed);
if (is_tdp_mmu_page(sp)) { if (is_tdp_mmu_page(sp)) {
gfn_end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level); flush = kvm_tdp_mmu_zap_sp(kvm, sp);
flush = kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, gfn_end);
} else { } else {
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
WARN_ON_ONCE(sp->lpage_disallowed); WARN_ON_ONCE(sp->lpage_disallowed);
......
...@@ -723,13 +723,14 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, ...@@ -723,13 +723,14 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
* SPTEs have been cleared and a TLB flush is needed before releasing the * SPTEs have been cleared and a TLB flush is needed before releasing the
* MMU lock. * MMU lock.
*/ */
bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
bool can_yield)
{ {
struct kvm_mmu_page *root; struct kvm_mmu_page *root;
bool flush = false; bool flush = false;
for_each_tdp_mmu_root_yield_safe(kvm, root) for_each_tdp_mmu_root_yield_safe(kvm, root)
flush = zap_gfn_range(kvm, root, start, end, true, flush); flush = zap_gfn_range(kvm, root, start, end, can_yield, flush);
return flush; return flush;
} }
......
...@@ -8,7 +8,29 @@ ...@@ -8,7 +8,29 @@
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu); hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root); void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root);
bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end); bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
bool can_yield);
static inline bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start,
gfn_t end)
{
return __kvm_tdp_mmu_zap_gfn_range(kvm, start, end, true);
}
static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
{
gfn_t end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level);
/*
* Don't allow yielding, as the caller may have a flush pending. Note,
* if mmu_lock is held for write, zapping will never yield in this case,
* but explicitly disallow it for safety. The TDP MMU does not yield
* until it has made forward progress (steps sideways), and when zapping
* a single shadow page that it's guaranteed to see (thus the mmu_lock
* requirement), its "step sideways" will always step beyond the bounds
* of the shadow page's gfn range and stop iterating before yielding.
*/
lockdep_assert_held_write(&kvm->mmu_lock);
return __kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, end, false);
}
void kvm_tdp_mmu_zap_all(struct kvm *kvm); void kvm_tdp_mmu_zap_all(struct kvm *kvm);
int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment