Commit a30f47cb authored by Xiao Guangrong's avatar Xiao Guangrong Committed by Avi Kivity

KVM: MMU: improve write flooding detected

Detecting write-flooding does not work well, when we handle page written, if
the last speculative spte is not accessed, we treat the page is
write-flooding, however, we can speculative spte on many path, such as pte
prefetch, page synced, that means the last speculative spte may be not point
to the written page and the written page can be accessed via other sptes, so
depends on the Accessed bit of the last speculative spte is not enough

Instead of detected page accessed, we can detect whether the spte is accessed
after it is written, if the spte is not accessed but it is written frequently,
we treat is not a page table or it not used for a long time
Signed-off-by: default avatarXiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent 5d9ca30e
...@@ -239,6 +239,8 @@ struct kvm_mmu_page { ...@@ -239,6 +239,8 @@ struct kvm_mmu_page {
int clear_spte_count; int clear_spte_count;
#endif #endif
int write_flooding_count;
struct rcu_head rcu; struct rcu_head rcu;
}; };
...@@ -353,10 +355,6 @@ struct kvm_vcpu_arch { ...@@ -353,10 +355,6 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_cache; struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache; struct kvm_mmu_memory_cache mmu_page_header_cache;
gfn_t last_pt_write_gfn;
int last_pt_write_count;
u64 *last_pte_updated;
struct fpu guest_fpu; struct fpu guest_fpu;
u64 xcr0; u64 xcr0;
......
...@@ -1653,6 +1653,18 @@ static void init_shadow_page_table(struct kvm_mmu_page *sp) ...@@ -1653,6 +1653,18 @@ static void init_shadow_page_table(struct kvm_mmu_page *sp)
sp->spt[i] = 0ull; sp->spt[i] = 0ull;
} }
static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
{
sp->write_flooding_count = 0;
}
static void clear_sp_write_flooding_count(u64 *spte)
{
struct kvm_mmu_page *sp = page_header(__pa(spte));
__clear_sp_write_flooding_count(sp);
}
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gfn_t gfn, gfn_t gfn,
gva_t gaddr, gva_t gaddr,
...@@ -1696,6 +1708,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, ...@@ -1696,6 +1708,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
} else if (sp->unsync) } else if (sp->unsync)
kvm_mmu_mark_parents_unsync(sp); kvm_mmu_mark_parents_unsync(sp);
__clear_sp_write_flooding_count(sp);
trace_kvm_mmu_get_page(sp, false); trace_kvm_mmu_get_page(sp, false);
return sp; return sp;
} }
...@@ -1848,15 +1861,6 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) ...@@ -1848,15 +1861,6 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
mmu_page_remove_parent_pte(sp, parent_pte); mmu_page_remove_parent_pte(sp, parent_pte);
} }
static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
{
int i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
vcpu->arch.last_pte_updated = NULL;
}
static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
{ {
u64 *parent_pte; u64 *parent_pte;
...@@ -1916,7 +1920,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, ...@@ -1916,7 +1920,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
} }
sp->role.invalid = 1; sp->role.invalid = 1;
kvm_mmu_reset_last_pte_updated(kvm);
return ret; return ret;
} }
...@@ -2361,8 +2364,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ...@@ -2361,8 +2364,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
} }
} }
kvm_release_pfn_clean(pfn); kvm_release_pfn_clean(pfn);
if (speculative)
vcpu->arch.last_pte_updated = sptep;
} }
static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
...@@ -3523,13 +3524,6 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, ...@@ -3523,13 +3524,6 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
kvm_mmu_flush_tlb(vcpu); kvm_mmu_flush_tlb(vcpu);
} }
static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
{
u64 *spte = vcpu->arch.last_pte_updated;
return !!(spte && (*spte & shadow_accessed_mask));
}
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
const u8 *new, int *bytes) const u8 *new, int *bytes)
{ {
...@@ -3570,22 +3564,16 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, ...@@ -3570,22 +3564,16 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
* If we're seeing too many writes to a page, it may no longer be a page table, * If we're seeing too many writes to a page, it may no longer be a page table,
* or we may be forking, in which case it is better to unmap the page. * or we may be forking, in which case it is better to unmap the page.
*/ */
static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn) static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte)
{ {
bool flooded = false; /*
* Skip write-flooding detected for the sp whose level is 1, because
if (gfn == vcpu->arch.last_pt_write_gfn * it can become unsync, then the guest page is not write-protected.
&& !last_updated_pte_accessed(vcpu)) { */
++vcpu->arch.last_pt_write_count; if (sp->role.level == 1)
if (vcpu->arch.last_pt_write_count >= 3) return false;
flooded = true;
} else {
vcpu->arch.last_pt_write_gfn = gfn;
vcpu->arch.last_pt_write_count = 1;
vcpu->arch.last_pte_updated = NULL;
}
return flooded; return ++sp->write_flooding_count >= 3;
} }
/* /*
...@@ -3657,7 +3645,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -3657,7 +3645,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
LIST_HEAD(invalid_list); LIST_HEAD(invalid_list);
u64 entry, gentry, *spte; u64 entry, gentry, *spte;
int npte; int npte;
bool remote_flush, local_flush, zap_page, flooded, misaligned; bool remote_flush, local_flush, zap_page;
/* /*
* If we don't have indirect shadow pages, it means no page is * If we don't have indirect shadow pages, it means no page is
...@@ -3683,12 +3671,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -3683,12 +3671,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
++vcpu->kvm->stat.mmu_pte_write; ++vcpu->kvm->stat.mmu_pte_write;
trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
flooded = detect_write_flooding(vcpu, gfn);
mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
misaligned = detect_write_misaligned(sp, gpa, bytes); spte = get_written_sptes(sp, gpa, &npte);
if (misaligned || flooded) { if (detect_write_misaligned(sp, gpa, bytes) ||
detect_write_flooding(sp, spte)) {
zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
&invalid_list); &invalid_list);
++vcpu->kvm->stat.mmu_flooded; ++vcpu->kvm->stat.mmu_flooded;
......
...@@ -497,6 +497,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -497,6 +497,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
shadow_walk_next(&it)) { shadow_walk_next(&it)) {
gfn_t table_gfn; gfn_t table_gfn;
clear_sp_write_flooding_count(it.sptep);
drop_large_spte(vcpu, it.sptep); drop_large_spte(vcpu, it.sptep);
sp = NULL; sp = NULL;
...@@ -522,6 +523,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -522,6 +523,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
shadow_walk_next(&it)) { shadow_walk_next(&it)) {
gfn_t direct_gfn; gfn_t direct_gfn;
clear_sp_write_flooding_count(it.sptep);
validate_direct_spte(vcpu, it.sptep, direct_access); validate_direct_spte(vcpu, it.sptep, direct_access);
drop_large_spte(vcpu, it.sptep); drop_large_spte(vcpu, it.sptep);
...@@ -536,6 +538,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -536,6 +538,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
link_shadow_page(it.sptep, sp); link_shadow_page(it.sptep, sp);
} }
clear_sp_write_flooding_count(it.sptep);
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
user_fault, write_fault, emulate, it.level, user_fault, write_fault, emulate, it.level,
gw->gfn, pfn, prefault, map_writable); gw->gfn, pfn, prefault, map_writable);
...@@ -599,11 +602,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, ...@@ -599,11 +602,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
*/ */
if (!r) { if (!r) {
pgprintk("%s: guest page fault\n", __func__); pgprintk("%s: guest page fault\n", __func__);
if (!prefault) { if (!prefault)
inject_page_fault(vcpu, &walker.fault); inject_page_fault(vcpu, &walker.fault);
/* reset fork detector */
vcpu->arch.last_pt_write_count = 0;
}
return 0; return 0;
} }
...@@ -641,9 +642,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, ...@@ -641,9 +642,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__, pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
sptep, *sptep, emulate); sptep, *sptep, emulate);
if (!emulate)
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
++vcpu->stat.pf_fixed; ++vcpu->stat.pf_fixed;
trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
spin_unlock(&vcpu->kvm->mmu_lock); spin_unlock(&vcpu->kvm->mmu_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment