Commit d25ceb92 authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini

KVM: x86/mmu: Track the number of TDP MMU pages, but not the actual pages

Track the number of TDP MMU "shadow" pages instead of tracking the pages
themselves. With the NX huge page list manipulation moved out of the common
linking flow, elminating the list-based tracking means the happy path of
adding a shadow page doesn't need to acquire a spinlock and can instead
inc/dec an atomic.

Keep the tracking as the WARN during TDP MMU teardown on leaked shadow
pages is very, very useful for detecting KVM bugs.

Tracking the number of pages will also make it trivial to expose the
counter to userspace as a stat in the future, which may or may not be
desirable.

Note, the TDP MMU needs to use a separate counter (and stat if that ever
comes to be) from the existing n_used_mmu_pages. The TDP MMU doesn't bother
supporting the shrinker nor does it honor KVM_SET_NR_MMU_PAGES (because the
TDP MMU consumes so few pages relative to shadow paging), and including TDP
MMU pages in that counter would break both the shrinker and shadow MMUs,
e.g. if a VM is using nested TDP.

Cc: Yan Zhao <yan.y.zhao@intel.com>
Reviewed-by: default avatarMingwei Zhang <mizhang@google.com>
Reviewed-by: default avatarDavid Matlack <dmatlack@google.com>
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Reviewed-by: default avatarYan Zhao <yan.y.zhao@intel.com>
Message-Id: <20221019165618.927057-6-seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 61f94478
...@@ -1298,6 +1298,9 @@ struct kvm_arch { ...@@ -1298,6 +1298,9 @@ struct kvm_arch {
*/ */
bool tdp_mmu_enabled; bool tdp_mmu_enabled;
/* The number of TDP MMU pages across all roots. */
atomic64_t tdp_mmu_pages;
/* /*
* List of kvm_mmu_page structs being used as roots. * List of kvm_mmu_page structs being used as roots.
* All kvm_mmu_page structs in the list should have * All kvm_mmu_page structs in the list should have
...@@ -1318,18 +1321,10 @@ struct kvm_arch { ...@@ -1318,18 +1321,10 @@ struct kvm_arch {
*/ */
struct list_head tdp_mmu_roots; struct list_head tdp_mmu_roots;
/*
* List of kvm_mmu_page structs not being used as roots.
* All kvm_mmu_page structs in the list should have
* tdp_mmu_page set and a tdp_mmu_root_count of 0.
*/
struct list_head tdp_mmu_pages;
/* /*
* Protects accesses to the following fields when the MMU lock * Protects accesses to the following fields when the MMU lock
* is held in read mode: * is held in read mode:
* - tdp_mmu_roots (above) * - tdp_mmu_roots (above)
* - tdp_mmu_pages (above)
* - the link field of kvm_mmu_page structs used by the TDP MMU * - the link field of kvm_mmu_page structs used by the TDP MMU
* - possible_nx_huge_pages; * - possible_nx_huge_pages;
* - the possible_nx_huge_page_link field of kvm_mmu_page structs used * - the possible_nx_huge_page_link field of kvm_mmu_page structs used
......
...@@ -29,7 +29,6 @@ int kvm_mmu_init_tdp_mmu(struct kvm *kvm) ...@@ -29,7 +29,6 @@ int kvm_mmu_init_tdp_mmu(struct kvm *kvm)
kvm->arch.tdp_mmu_enabled = true; kvm->arch.tdp_mmu_enabled = true;
INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
kvm->arch.tdp_mmu_zap_wq = wq; kvm->arch.tdp_mmu_zap_wq = wq;
return 1; return 1;
} }
...@@ -54,7 +53,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) ...@@ -54,7 +53,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
/* Also waits for any queued work items. */ /* Also waits for any queued work items. */
destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); destroy_workqueue(kvm->arch.tdp_mmu_zap_wq);
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages)); WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
/* /*
...@@ -377,11 +376,13 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, ...@@ -377,11 +376,13 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{ {
kvm_account_pgtable_pages((void *)sp->spt, +1); kvm_account_pgtable_pages((void *)sp->spt, +1);
atomic64_inc(&kvm->arch.tdp_mmu_pages);
} }
static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{ {
kvm_account_pgtable_pages((void *)sp->spt, -1); kvm_account_pgtable_pages((void *)sp->spt, -1);
atomic64_dec(&kvm->arch.tdp_mmu_pages);
} }
/** /**
...@@ -397,17 +398,17 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, ...@@ -397,17 +398,17 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
bool shared) bool shared)
{ {
tdp_unaccount_mmu_page(kvm, sp); tdp_unaccount_mmu_page(kvm, sp);
if (!sp->nx_huge_page_disallowed)
return;
if (shared) if (shared)
spin_lock(&kvm->arch.tdp_mmu_pages_lock); spin_lock(&kvm->arch.tdp_mmu_pages_lock);
else else
lockdep_assert_held_write(&kvm->mmu_lock); lockdep_assert_held_write(&kvm->mmu_lock);
list_del(&sp->link); sp->nx_huge_page_disallowed = false;
untrack_possible_nx_huge_page(kvm, sp);
if (sp->nx_huge_page_disallowed) {
sp->nx_huge_page_disallowed = false;
untrack_possible_nx_huge_page(kvm, sp);
}
if (shared) if (shared)
spin_unlock(&kvm->arch.tdp_mmu_pages_lock); spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
...@@ -1140,9 +1141,6 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, ...@@ -1140,9 +1141,6 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
tdp_mmu_set_spte(kvm, iter, spte); tdp_mmu_set_spte(kvm, iter, spte);
} }
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
tdp_account_mmu_page(kvm, sp); tdp_account_mmu_page(kvm, sp);
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment