Commit a8ac499b authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini

KVM: x86/mmu: Don't require refcounted "struct page" to create huge SPTEs

Drop the requirement that a pfn be backed by a refcounted, compound or
or ZONE_DEVICE, struct page, and instead rely solely on the host page
tables to identify huge pages.  The PageCompound() check is a remnant of
an old implementation that identified (well, attempt to identify) huge
pages without walking the host page tables.  The ZONE_DEVICE check was
added as an exception to the PageCompound() requirement.  In other words,
neither check is actually a hard requirement, if the primary has a pfn
backed with a huge page, then KVM can back the pfn with a huge page
regardless of the backing store.

Dropping the @pfn parameter will also allow KVM to query the max host
mapping level without having to first get the pfn, which is advantageous
for use outside of the page fault path where KVM wants to take action if
and only if a page can be mapped huge, i.e. avoids the pfn lookup for
gfns that can't be backed with a huge page.

Cc: Mingwei Zhang <mizhang@google.com>
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Reviewed-by: default avatarMingwei Zhang <mizhang@google.com>
Message-Id: <20220715232107.3775620-2-seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent d5e90a69
...@@ -2920,11 +2920,10 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) ...@@ -2920,11 +2920,10 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
__direct_pte_prefetch(vcpu, sp, sptep); __direct_pte_prefetch(vcpu, sp, sptep);
} }
static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
const struct kvm_memory_slot *slot) const struct kvm_memory_slot *slot)
{ {
int level = PG_LEVEL_4K; int level = PG_LEVEL_4K;
struct page *page;
unsigned long hva; unsigned long hva;
unsigned long flags; unsigned long flags;
pgd_t pgd; pgd_t pgd;
...@@ -2932,17 +2931,6 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, ...@@ -2932,17 +2931,6 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
pud_t pud; pud_t pud;
pmd_t pmd; pmd_t pmd;
/*
* Note, @slot must be non-NULL, i.e. the caller is responsible for
* ensuring @pfn isn't garbage and is backed by a memslot.
*/
page = kvm_pfn_to_refcounted_page(pfn);
if (!page)
return PG_LEVEL_4K;
if (!PageCompound(page) && !kvm_is_zone_device_page(page))
return PG_LEVEL_4K;
/* /*
* Note, using the already-retrieved memslot and __gfn_to_hva_memslot() * Note, using the already-retrieved memslot and __gfn_to_hva_memslot()
* is not solely for performance, it's also necessary to avoid the * is not solely for performance, it's also necessary to avoid the
...@@ -2995,7 +2983,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, ...@@ -2995,7 +2983,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
int kvm_mmu_max_mapping_level(struct kvm *kvm, int kvm_mmu_max_mapping_level(struct kvm *kvm,
const struct kvm_memory_slot *slot, gfn_t gfn, const struct kvm_memory_slot *slot, gfn_t gfn,
kvm_pfn_t pfn, int max_level) int max_level)
{ {
struct kvm_lpage_info *linfo; struct kvm_lpage_info *linfo;
int host_level; int host_level;
...@@ -3010,7 +2998,7 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm, ...@@ -3010,7 +2998,7 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
if (max_level == PG_LEVEL_4K) if (max_level == PG_LEVEL_4K)
return PG_LEVEL_4K; return PG_LEVEL_4K;
host_level = host_pfn_mapping_level(kvm, gfn, pfn, slot); host_level = host_pfn_mapping_level(kvm, gfn, slot);
return min(host_level, max_level); return min(host_level, max_level);
} }
...@@ -3035,8 +3023,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault ...@@ -3035,8 +3023,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
* level, which will be used to do precise, accurate accounting. * level, which will be used to do precise, accurate accounting.
*/ */
fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
fault->gfn, fault->pfn, fault->gfn, fault->max_level);
fault->max_level);
if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed) if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
return; return;
...@@ -6418,7 +6405,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, ...@@ -6418,7 +6405,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
*/ */
if (sp->role.direct && if (sp->role.direct &&
sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn, sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn,
pfn, PG_LEVEL_NUM)) { PG_LEVEL_NUM)) {
kvm_zap_one_rmap_spte(kvm, rmap_head, sptep); kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
if (kvm_available_flush_tlb_with_range()) if (kvm_available_flush_tlb_with_range())
......
...@@ -309,7 +309,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ...@@ -309,7 +309,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int kvm_mmu_max_mapping_level(struct kvm *kvm, int kvm_mmu_max_mapping_level(struct kvm *kvm,
const struct kvm_memory_slot *slot, gfn_t gfn, const struct kvm_memory_slot *slot, gfn_t gfn,
kvm_pfn_t pfn, int max_level); int max_level);
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level); void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level);
......
...@@ -1733,7 +1733,6 @@ static void zap_collapsible_spte_range(struct kvm *kvm, ...@@ -1733,7 +1733,6 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
gfn_t end = start + slot->npages; gfn_t end = start + slot->npages;
struct tdp_iter iter; struct tdp_iter iter;
int max_mapping_level; int max_mapping_level;
kvm_pfn_t pfn;
rcu_read_lock(); rcu_read_lock();
...@@ -1745,13 +1744,8 @@ static void zap_collapsible_spte_range(struct kvm *kvm, ...@@ -1745,13 +1744,8 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
!is_last_spte(iter.old_spte, iter.level)) !is_last_spte(iter.old_spte, iter.level))
continue; continue;
/*
* This is a leaf SPTE. Check if the PFN it maps can
* be mapped at a higher level.
*/
pfn = spte_to_pfn(iter.old_spte);
max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot, max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,
iter.gfn, pfn, PG_LEVEL_NUM); iter.gfn, PG_LEVEL_NUM);
WARN_ON(max_mapping_level < iter.level); WARN_ON(max_mapping_level < iter.level);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment