Commit d78bca72 authored by Paul Mackerras's avatar Paul Mackerras Committed by Alexander Graf

KVM: PPC: Book3S PR: Use mmu_notifier_retry() in kvmppc_mmu_map_page()

When the MM code is invalidating a range of pages, it calls the KVM
kvm_mmu_notifier_invalidate_range_start() notifier function, which calls
kvm_unmap_hva_range(), which arranges to flush all the existing host
HPTEs for guest pages.  However, the Linux PTEs for the range being
flushed are still valid at that point.  We are not supposed to establish
any new references to pages in the range until the ...range_end()
notifier gets called.  The PPC-specific KVM code doesn't get any
explicit notification of that; instead, we are supposed to use
mmu_notifier_retry() to test whether we are or have been inside a
range flush notifier pair while we have been getting a page and
instantiating a host HPTE for the page.

This therefore adds a call to mmu_notifier_retry inside
kvmppc_mmu_map_page().  This call is inside a region locked with
kvm->mmu_lock, which is the same lock that is called by the KVM
MMU notifier functions, thus ensuring that no new notification can
proceed while we are in the locked region.  Inside this region we
also create the host HPTE and link the corresponding hpte_cache
structure into the lists used to find it later.  We cannot allocate
the hpte_cache structure inside this locked region because that can
lead to deadlock, so we allocate it outside the region and free it
if we end up not using it.

This also moves the updates of vcpu3s->hpte_cache_count inside the
regions locked with vcpu3s->mmu_lock, and does the increment in
kvmppc_mmu_hpte_cache_map() when the pte is added to the cache
rather than when it is allocated, in order that the hpte_cache_count
is accurate.
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarAlexander Graf <agraf@suse.de>
parent 93b159b4
...@@ -142,6 +142,7 @@ extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, ...@@ -142,6 +142,7 @@ extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte);
extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu);
extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu); extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte); extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
......
...@@ -93,6 +93,13 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, ...@@ -93,6 +93,13 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
int r = 0; int r = 0;
int hpsize = MMU_PAGE_4K; int hpsize = MMU_PAGE_4K;
bool writable; bool writable;
unsigned long mmu_seq;
struct kvm *kvm = vcpu->kvm;
struct hpte_cache *cpte;
/* used to check for invalidations in progress */
mmu_seq = kvm->mmu_notifier_seq;
smp_rmb();
/* Get host physical address for gpa */ /* Get host physical address for gpa */
hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT, hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT,
...@@ -143,6 +150,14 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, ...@@ -143,6 +150,14 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M); hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M);
cpte = kvmppc_mmu_hpte_cache_next(vcpu);
spin_lock(&kvm->mmu_lock);
if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) {
r = -EAGAIN;
goto out_unlock;
}
map_again: map_again:
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
...@@ -150,7 +165,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, ...@@ -150,7 +165,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
if (attempt > 1) if (attempt > 1)
if (ppc_md.hpte_remove(hpteg) < 0) { if (ppc_md.hpte_remove(hpteg) < 0) {
r = -1; r = -1;
goto out; goto out_unlock;
} }
ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
...@@ -163,8 +178,6 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, ...@@ -163,8 +178,6 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
attempt++; attempt++;
goto map_again; goto map_again;
} else { } else {
struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
trace_kvm_book3s_64_mmu_map(rflags, hpteg, trace_kvm_book3s_64_mmu_map(rflags, hpteg,
vpn, hpaddr, orig_pte); vpn, hpaddr, orig_pte);
...@@ -175,15 +188,21 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, ...@@ -175,15 +188,21 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
} }
pte->slot = hpteg + (ret & 7); cpte->slot = hpteg + (ret & 7);
pte->host_vpn = vpn; cpte->host_vpn = vpn;
pte->pte = *orig_pte; cpte->pte = *orig_pte;
pte->pfn = hpaddr >> PAGE_SHIFT; cpte->pfn = hpaddr >> PAGE_SHIFT;
pte->pagesize = hpsize; cpte->pagesize = hpsize;
kvmppc_mmu_hpte_cache_map(vcpu, pte); kvmppc_mmu_hpte_cache_map(vcpu, cpte);
cpte = NULL;
} }
out_unlock:
spin_unlock(&kvm->mmu_lock);
kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
if (cpte)
kvmppc_mmu_hpte_cache_free(cpte);
out: out:
return r; return r;
......
...@@ -98,6 +98,8 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) ...@@ -98,6 +98,8 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
&vcpu3s->hpte_hash_vpte_64k[index]); &vcpu3s->hpte_hash_vpte_64k[index]);
#endif #endif
vcpu3s->hpte_cache_count++;
spin_unlock(&vcpu3s->mmu_lock); spin_unlock(&vcpu3s->mmu_lock);
} }
...@@ -131,10 +133,10 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) ...@@ -131,10 +133,10 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
#ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_BOOK3S_64
hlist_del_init_rcu(&pte->list_vpte_64k); hlist_del_init_rcu(&pte->list_vpte_64k);
#endif #endif
vcpu3s->hpte_cache_count--;
spin_unlock(&vcpu3s->mmu_lock); spin_unlock(&vcpu3s->mmu_lock);
vcpu3s->hpte_cache_count--;
call_rcu(&pte->rcu_head, free_pte_rcu); call_rcu(&pte->rcu_head, free_pte_rcu);
} }
...@@ -331,15 +333,19 @@ struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) ...@@ -331,15 +333,19 @@ struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hpte_cache *pte; struct hpte_cache *pte;
pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
vcpu3s->hpte_cache_count++;
if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM) if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM)
kvmppc_mmu_pte_flush_all(vcpu); kvmppc_mmu_pte_flush_all(vcpu);
pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
return pte; return pte;
} }
void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte)
{
kmem_cache_free(hpte_cache, pte);
}
void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu) void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu)
{ {
kvmppc_mmu_pte_flush(vcpu, 0, 0); kvmppc_mmu_pte_flush(vcpu, 0, 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment