Commit 08e850c6 authored by Avi Kivity's avatar Avi Kivity

KVM: MMU: Reinstate pte prefetch on invlpg

Commit fb341f57 removed the pte prefetch on guest invlpg, citing guest races.
However, the SDM is adamant that prefetch is allowed:

  "The processor may create entries in paging-structure caches for
   translations required for prefetches and for accesses that are a
   result of speculative execution that would never actually occur
   in the executed code path."

And, in fact, there was a race in the prefetch code: we picked up the pte
without the mmu lock held, so an older invlpg could install the pte over
a newer invlpg.

Reinstate the prefetch logic, but this time note whether another invlpg has
executed using a counter.  If a race occured, do not install the pte.
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
Signed-off-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
parent fbc5d139
...@@ -389,6 +389,7 @@ struct kvm_arch { ...@@ -389,6 +389,7 @@ struct kvm_arch {
unsigned int n_free_mmu_pages; unsigned int n_free_mmu_pages;
unsigned int n_requested_mmu_pages; unsigned int n_requested_mmu_pages;
unsigned int n_alloc_mmu_pages; unsigned int n_alloc_mmu_pages;
atomic_t invlpg_counter;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/* /*
* Hash table of struct kvm_mmu_page. * Hash table of struct kvm_mmu_page.
......
...@@ -2613,20 +2613,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -2613,20 +2613,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
int flooded = 0; int flooded = 0;
int npte; int npte;
int r; int r;
int invlpg_counter;
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
switch (bytes) { invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter);
case 4:
gentry = *(const u32 *)new;
break;
case 8:
gentry = *(const u64 *)new;
break;
default:
gentry = 0;
break;
}
/* /*
* Assume that the pte write on a page table of the same type * Assume that the pte write on a page table of the same type
...@@ -2634,16 +2625,34 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -2634,16 +2625,34 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
* (might be false while changing modes). Note it is verified later * (might be false while changing modes). Note it is verified later
* by update_pte(). * by update_pte().
*/ */
if (is_pae(vcpu) && bytes == 4) { if ((is_pae(vcpu) && bytes == 4) || !new) {
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */ /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
if (is_pae(vcpu)) {
gpa &= ~(gpa_t)7; gpa &= ~(gpa_t)7;
r = kvm_read_guest(vcpu->kvm, gpa, &gentry, 8); bytes = 8;
}
r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
if (r) if (r)
gentry = 0; gentry = 0;
new = (const u8 *)&gentry;
}
switch (bytes) {
case 4:
gentry = *(const u32 *)new;
break;
case 8:
gentry = *(const u64 *)new;
break;
default:
gentry = 0;
break;
} }
mmu_guess_page_from_pte_write(vcpu, gpa, gentry); mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
spin_lock(&vcpu->kvm->mmu_lock); spin_lock(&vcpu->kvm->mmu_lock);
if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
gentry = 0;
kvm_mmu_access_page(vcpu, gfn); kvm_mmu_access_page(vcpu, gfn);
kvm_mmu_free_some_pages(vcpu); kvm_mmu_free_some_pages(vcpu);
++vcpu->kvm->stat.mmu_pte_write; ++vcpu->kvm->stat.mmu_pte_write;
......
...@@ -463,6 +463,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -463,6 +463,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{ {
struct kvm_shadow_walk_iterator iterator; struct kvm_shadow_walk_iterator iterator;
gpa_t pte_gpa = -1;
int level; int level;
u64 *sptep; u64 *sptep;
int need_flush = 0; int need_flush = 0;
...@@ -476,6 +477,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) ...@@ -476,6 +477,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
if (level == PT_PAGE_TABLE_LEVEL || if (level == PT_PAGE_TABLE_LEVEL ||
((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
struct kvm_mmu_page *sp = page_header(__pa(sptep));
pte_gpa = (sp->gfn << PAGE_SHIFT);
pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
if (is_shadow_present_pte(*sptep)) { if (is_shadow_present_pte(*sptep)) {
rmap_remove(vcpu->kvm, sptep); rmap_remove(vcpu->kvm, sptep);
...@@ -493,7 +498,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) ...@@ -493,7 +498,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
if (need_flush) if (need_flush)
kvm_flush_remote_tlbs(vcpu->kvm); kvm_flush_remote_tlbs(vcpu->kvm);
atomic_inc(&vcpu->kvm->arch.invlpg_counter);
spin_unlock(&vcpu->kvm->mmu_lock); spin_unlock(&vcpu->kvm->mmu_lock);
if (pte_gpa == -1)
return;
if (mmu_topup_memory_caches(vcpu))
return;
kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0);
} }
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment