Commit 27aa8962 authored by Radim Krčmář's avatar Radim Krčmář

Merge tag 'kvm-ppc-next-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc

KVM PPC update for 4.17

- Improvements for the radix page fault handler for HV KVM on POWER9.
parents f497b6c2 31c8b0d0
...@@ -60,7 +60,6 @@ ...@@ -60,7 +60,6 @@
#define KVM_ARCH_WANT_MMU_NOTIFIER #define KVM_ARCH_WANT_MMU_NOTIFIER
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
extern int kvm_unmap_hva_range(struct kvm *kvm, extern int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end); unsigned long start, unsigned long end);
extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
......
...@@ -295,7 +295,6 @@ struct kvmppc_ops { ...@@ -295,7 +295,6 @@ struct kvmppc_ops {
const struct kvm_userspace_memory_region *mem, const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old, const struct kvm_memory_slot *old,
const struct kvm_memory_slot *new); const struct kvm_memory_slot *new);
int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
unsigned long end); unsigned long end);
int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
......
...@@ -819,12 +819,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, ...@@ -819,12 +819,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new); kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new);
} }
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
{
return kvm->arch.kvm_ops->unmap_hva(kvm, hva);
}
EXPORT_SYMBOL_GPL(kvm_unmap_hva);
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
{ {
return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
struct kvm_memory_slot *memslot); struct kvm_memory_slot *memslot);
extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
unsigned long end); unsigned long end);
extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
......
...@@ -877,15 +877,6 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, ...@@ -877,15 +877,6 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
return 0; return 0;
} }
int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
{
hva_handler_fn handler;
handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
kvm_handle_hva(kvm, hva, handler);
return 0;
}
int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
{ {
hva_handler_fn handler; hva_handler_fn handler;
......
...@@ -150,7 +150,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, ...@@ -150,7 +150,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
{ {
int psize = MMU_BASE_PSIZE; int psize = MMU_BASE_PSIZE;
if (pshift >= PMD_SHIFT) if (pshift >= PUD_SHIFT)
psize = MMU_PAGE_1G;
else if (pshift >= PMD_SHIFT)
psize = MMU_PAGE_2M; psize = MMU_PAGE_2M;
addr &= ~0xfffUL; addr &= ~0xfffUL;
addr |= mmu_psize_defs[psize].ap << 5; addr |= mmu_psize_defs[psize].ap << 5;
...@@ -160,6 +162,17 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, ...@@ -160,6 +162,17 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
asm volatile("ptesync": : :"memory"); asm volatile("ptesync": : :"memory");
} }
static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
{
unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
asm volatile("ptesync": : :"memory");
/* RIC=1 PRS=0 R=1 IS=2 */
asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
: : "r" (rb), "r" (kvm->arch.lpid) : "memory");
asm volatile("ptesync": : :"memory");
}
unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
unsigned long clr, unsigned long set, unsigned long clr, unsigned long set,
unsigned long addr, unsigned int shift) unsigned long addr, unsigned int shift)
...@@ -195,6 +208,12 @@ static void kvmppc_pte_free(pte_t *ptep) ...@@ -195,6 +208,12 @@ static void kvmppc_pte_free(pte_t *ptep)
kmem_cache_free(kvm_pte_cache, ptep); kmem_cache_free(kvm_pte_cache, ptep);
} }
/* Like pmd_huge() and pmd_large(), but works regardless of config options */
static inline int pmd_is_leaf(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_PTE);
}
static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
unsigned int level, unsigned long mmu_seq) unsigned int level, unsigned long mmu_seq)
{ {
...@@ -214,12 +233,12 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, ...@@ -214,12 +233,12 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
new_pud = pud_alloc_one(kvm->mm, gpa); new_pud = pud_alloc_one(kvm->mm, gpa);
pmd = NULL; pmd = NULL;
if (pud && pud_present(*pud)) if (pud && pud_present(*pud) && !pud_huge(*pud))
pmd = pmd_offset(pud, gpa); pmd = pmd_offset(pud, gpa);
else else if (level <= 1)
new_pmd = pmd_alloc_one(kvm->mm, gpa); new_pmd = pmd_alloc_one(kvm->mm, gpa);
if (level == 0 && !(pmd && pmd_present(*pmd))) if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
new_ptep = kvmppc_pte_alloc(); new_ptep = kvmppc_pte_alloc();
/* Check if we might have been invalidated; let the guest retry if so */ /* Check if we might have been invalidated; let the guest retry if so */
...@@ -237,6 +256,50 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, ...@@ -237,6 +256,50 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
new_pud = NULL; new_pud = NULL;
} }
pud = pud_offset(pgd, gpa); pud = pud_offset(pgd, gpa);
if (pud_huge(*pud)) {
unsigned long hgpa = gpa & PUD_MASK;
/*
* If we raced with another CPU which has just put
* a 1GB pte in after we saw a pmd page, try again.
*/
if (level <= 1 && !new_pmd) {
ret = -EAGAIN;
goto out_unlock;
}
/* Check if we raced and someone else has set the same thing */
if (level == 2 && pud_raw(*pud) == pte_raw(pte)) {
ret = 0;
goto out_unlock;
}
/* Valid 1GB page here already, remove it */
old = kvmppc_radix_update_pte(kvm, (pte_t *)pud,
~0UL, 0, hgpa, PUD_SHIFT);
kvmppc_radix_tlbie_page(kvm, hgpa, PUD_SHIFT);
if (old & _PAGE_DIRTY) {
unsigned long gfn = hgpa >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
memslot = gfn_to_memslot(kvm, gfn);
if (memslot && memslot->dirty_bitmap)
kvmppc_update_dirty_map(memslot,
gfn, PUD_SIZE);
}
}
if (level == 2) {
if (!pud_none(*pud)) {
/*
* There's a page table page here, but we wanted to
* install a large page, so remove and free the page
* table page. new_pmd will be NULL since level == 2.
*/
new_pmd = pmd_offset(pud, 0);
pud_clear(pud);
kvmppc_radix_flush_pwc(kvm, gpa);
}
kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
ret = 0;
goto out_unlock;
}
if (pud_none(*pud)) { if (pud_none(*pud)) {
if (!new_pmd) if (!new_pmd)
goto out_unlock; goto out_unlock;
...@@ -244,40 +307,71 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, ...@@ -244,40 +307,71 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
new_pmd = NULL; new_pmd = NULL;
} }
pmd = pmd_offset(pud, gpa); pmd = pmd_offset(pud, gpa);
if (pmd_large(*pmd)) { if (pmd_is_leaf(*pmd)) {
/* Someone else has instantiated a large page here; retry */ unsigned long lgpa = gpa & PMD_MASK;
ret = -EAGAIN;
goto out_unlock;
}
if (level == 1 && !pmd_none(*pmd)) {
/* /*
* There's a page table page here, but we wanted * If we raced with another CPU which has just put
* to install a large page. Tell the caller and let * a 2MB pte in after we saw a pte page, try again.
* it try installing a normal page if it wants.
*/ */
ret = -EBUSY; if (level == 0 && !new_ptep) {
goto out_unlock; ret = -EAGAIN;
} goto out_unlock;
if (level == 0) {
if (pmd_none(*pmd)) {
if (!new_ptep)
goto out_unlock;
pmd_populate(kvm->mm, pmd, new_ptep);
new_ptep = NULL;
} }
ptep = pte_offset_kernel(pmd, gpa); /* Check if we raced and someone else has set the same thing */
if (pte_present(*ptep)) { if (level == 1 && pmd_raw(*pmd) == pte_raw(pte)) {
/* PTE was previously valid, so invalidate it */ ret = 0;
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, goto out_unlock;
0, gpa, 0); }
kvmppc_radix_tlbie_page(kvm, gpa, 0); /* Valid 2MB page here already, remove it */
if (old & _PAGE_DIRTY) old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
mark_page_dirty(kvm, gpa >> PAGE_SHIFT); ~0UL, 0, lgpa, PMD_SHIFT);
kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
if (old & _PAGE_DIRTY) {
unsigned long gfn = lgpa >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
memslot = gfn_to_memslot(kvm, gfn);
if (memslot && memslot->dirty_bitmap)
kvmppc_update_dirty_map(memslot,
gfn, PMD_SIZE);
}
}
if (level == 1) {
if (!pmd_none(*pmd)) {
/*
* There's a page table page here, but we wanted to
* install a large page, so remove and free the page
* table page. new_ptep will be NULL since level == 1.
*/
new_ptep = pte_offset_kernel(pmd, 0);
pmd_clear(pmd);
kvmppc_radix_flush_pwc(kvm, gpa);
} }
kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
} else {
kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte); kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
ret = 0;
goto out_unlock;
} }
if (pmd_none(*pmd)) {
if (!new_ptep)
goto out_unlock;
pmd_populate(kvm->mm, pmd, new_ptep);
new_ptep = NULL;
}
ptep = pte_offset_kernel(pmd, gpa);
if (pte_present(*ptep)) {
/* Check if someone else set the same thing */
if (pte_raw(*ptep) == pte_raw(pte)) {
ret = 0;
goto out_unlock;
}
/* PTE was previously valid, so invalidate it */
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
0, gpa, 0);
kvmppc_radix_tlbie_page(kvm, gpa, 0);
if (old & _PAGE_DIRTY)
mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
}
kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
ret = 0; ret = 0;
out_unlock: out_unlock:
...@@ -298,11 +392,11 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -298,11 +392,11 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned long mmu_seq, pte_size; unsigned long mmu_seq, pte_size;
unsigned long gpa, gfn, hva, pfn; unsigned long gpa, gfn, hva, pfn;
struct kvm_memory_slot *memslot; struct kvm_memory_slot *memslot;
struct page *page = NULL, *pages[1]; struct page *page = NULL;
long ret, npages, ok; long ret;
unsigned int writing; bool writing;
struct vm_area_struct *vma; bool upgrade_write = false;
unsigned long flags; bool *upgrade_p = &upgrade_write;
pte_t pte, *ptep; pte_t pte, *ptep;
unsigned long pgflags; unsigned long pgflags;
unsigned int shift, level; unsigned int shift, level;
...@@ -342,135 +436,137 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -342,135 +436,137 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
dsisr & DSISR_ISSTORE); dsisr & DSISR_ISSTORE);
} }
/* used to check for invalidations in progress */
mmu_seq = kvm->mmu_notifier_seq;
smp_rmb();
writing = (dsisr & DSISR_ISSTORE) != 0; writing = (dsisr & DSISR_ISSTORE) != 0;
hva = gfn_to_hva_memslot(memslot, gfn); if (memslot->flags & KVM_MEM_READONLY) {
if (writing) {
/* give the guest a DSI */
dsisr = DSISR_ISSTORE | DSISR_PROTFAULT;
kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
return RESUME_GUEST;
}
upgrade_p = NULL;
}
if (dsisr & DSISR_SET_RC) { if (dsisr & DSISR_SET_RC) {
/* /*
* Need to set an R or C bit in the 2nd-level tables; * Need to set an R or C bit in the 2nd-level tables;
* if the relevant bits aren't already set in the linux * since we are just helping out the hardware here,
* page tables, fall through to do the gup_fast to * it is sufficient to do what the hardware does.
* set them in the linux page tables too.
*/ */
ok = 0;
pgflags = _PAGE_ACCESSED; pgflags = _PAGE_ACCESSED;
if (writing) if (writing)
pgflags |= _PAGE_DIRTY; pgflags |= _PAGE_DIRTY;
local_irq_save(flags); /*
ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL); * We are walking the secondary page table here. We can do this
if (ptep) { * without disabling irq.
pte = READ_ONCE(*ptep); */
if (pte_present(pte) && spin_lock(&kvm->mmu_lock);
(pte_val(pte) & pgflags) == pgflags) ptep = __find_linux_pte(kvm->arch.pgtable,
ok = 1; gpa, NULL, &shift);
} if (ptep && pte_present(*ptep) &&
local_irq_restore(flags); (!writing || pte_write(*ptep))) {
if (ok) { kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
spin_lock(&kvm->mmu_lock); gpa, shift);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { dsisr &= ~DSISR_SET_RC;
spin_unlock(&kvm->mmu_lock);
return RESUME_GUEST;
}
/*
* We are walking the secondary page table here. We can do this
* without disabling irq.
*/
ptep = __find_linux_pte(kvm->arch.pgtable,
gpa, NULL, &shift);
if (ptep && pte_present(*ptep)) {
kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
gpa, shift);
spin_unlock(&kvm->mmu_lock);
return RESUME_GUEST;
}
spin_unlock(&kvm->mmu_lock);
} }
spin_unlock(&kvm->mmu_lock);
if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
DSISR_PROTFAULT | DSISR_SET_RC)))
return RESUME_GUEST;
} }
ret = -EFAULT; /* used to check for invalidations in progress */
pfn = 0; mmu_seq = kvm->mmu_notifier_seq;
pte_size = PAGE_SIZE; smp_rmb();
pgflags = _PAGE_READ | _PAGE_EXEC;
level = 0; /*
npages = get_user_pages_fast(hva, 1, writing, pages); * Do a fast check first, since __gfn_to_pfn_memslot doesn't
if (npages < 1) { * do it with !atomic && !async, which is how we call it.
/* Check if it's an I/O mapping */ * We always ask for write permission since the common case
down_read(&current->mm->mmap_sem); * is that the page is writable.
vma = find_vma(current->mm, hva); */
if (vma && vma->vm_start <= hva && hva < vma->vm_end && hva = gfn_to_hva_memslot(memslot, gfn);
(vma->vm_flags & VM_PFNMAP)) { if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
pfn = vma->vm_pgoff +
((hva - vma->vm_start) >> PAGE_SHIFT);
pgflags = pgprot_val(vma->vm_page_prot);
}
up_read(&current->mm->mmap_sem);
if (!pfn)
return -EFAULT;
} else {
page = pages[0];
pfn = page_to_pfn(page); pfn = page_to_pfn(page);
if (PageHuge(page)) { upgrade_write = true;
page = compound_head(page); } else {
pte_size <<= compound_order(page); /* Call KVM generic code to do the slow-path check */
/* See if we can insert a 2MB large-page PTE here */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
if (pte_size >= PMD_SIZE && writing, upgrade_p);
(gpa & PMD_MASK & PAGE_MASK) == if (is_error_noslot_pfn(pfn))
(hva & PMD_MASK & PAGE_MASK)) { return -EFAULT;
level = 1; page = NULL;
pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); if (pfn_valid(pfn)) {
} page = pfn_to_page(pfn);
if (PageReserved(page))
page = NULL;
} }
/* See if we can provide write access */ }
if (writing) {
/* /* See if we can insert a 1GB or 2MB large PTE here */
* We assume gup_fast has set dirty on the host PTE. level = 0;
*/ if (page && PageCompound(page)) {
pgflags |= _PAGE_WRITE; pte_size = PAGE_SIZE << compound_order(compound_head(page));
} else { if (pte_size >= PUD_SIZE &&
local_irq_save(flags); (gpa & (PUD_SIZE - PAGE_SIZE)) ==
ptep = find_current_mm_pte(current->mm->pgd, (hva & (PUD_SIZE - PAGE_SIZE))) {
hva, NULL, NULL); level = 2;
if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
pgflags |= _PAGE_WRITE; } else if (pte_size >= PMD_SIZE &&
local_irq_restore(flags); (gpa & (PMD_SIZE - PAGE_SIZE)) ==
(hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1;
pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
} }
} }
/* /*
* Compute the PTE value that we need to insert. * Compute the PTE value that we need to insert.
*/ */
pgflags |= _PAGE_PRESENT | _PAGE_PTE | _PAGE_ACCESSED; if (page) {
if (pgflags & _PAGE_WRITE) pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
pgflags |= _PAGE_DIRTY; _PAGE_ACCESSED;
pte = pfn_pte(pfn, __pgprot(pgflags)); if (writing || upgrade_write)
pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
/* Allocate space in the tree and write the PTE */ pte = pfn_pte(pfn, __pgprot(pgflags));
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); } else {
if (ret == -EBUSY) {
/* /*
* There's already a PMD where wanted to install a large page; * Read the PTE from the process' radix tree and use that
* for now, fall back to installing a small page. * so we get the attribute bits.
*/ */
level = 0; local_irq_disable();
pfn |= gfn & ((PMD_SIZE >> PAGE_SHIFT) - 1); ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
pte = pfn_pte(pfn, __pgprot(pgflags)); pte = *ptep;
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); local_irq_enable();
if (shift == PUD_SHIFT &&
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
(hva & (PUD_SIZE - PAGE_SIZE))) {
level = 2;
} else if (shift == PMD_SHIFT &&
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
(hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1;
} else if (shift && shift != PAGE_SHIFT) {
/* Adjust PFN */
unsigned long mask = (1ul << shift) - PAGE_SIZE;
pte = __pte(pte_val(pte) | (hva & mask));
}
if (!(writing || upgrade_write))
pte = __pte(pte_val(pte) & ~ _PAGE_WRITE);
pte = __pte(pte_val(pte) | _PAGE_EXEC);
} }
if (ret == 0 || ret == -EAGAIN)
ret = RESUME_GUEST; /* Allocate space in the tree and write the PTE */
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
if (page) { if (page) {
/* if (!ret && (pte_val(pte) & _PAGE_WRITE))
* We drop pages[0] here, not page because page might set_page_dirty_lock(page);
* have been set to the head page of a compound, but put_page(page);
* we have to drop the reference on the correct tail
* page to match the get inside gup()
*/
put_page(pages[0]);
} }
if (ret == 0 || ret == -EAGAIN)
ret = RESUME_GUEST;
return ret; return ret;
} }
...@@ -642,9 +738,13 @@ void kvmppc_free_radix(struct kvm *kvm) ...@@ -642,9 +738,13 @@ void kvmppc_free_radix(struct kvm *kvm)
for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) { for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
if (!pud_present(*pud)) if (!pud_present(*pud))
continue; continue;
if (pud_huge(*pud)) {
pud_clear(pud);
continue;
}
pmd = pmd_offset(pud, 0); pmd = pmd_offset(pud, 0);
for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
if (pmd_huge(*pmd)) { if (pmd_is_leaf(*pmd)) {
pmd_clear(pmd); pmd_clear(pmd);
continue; continue;
} }
......
...@@ -450,7 +450,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -450,7 +450,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
/* /*
* Synchronize with the MMU notifier callbacks in * Synchronize with the MMU notifier callbacks in
* book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.). * book3s_64_mmu_hv.c (kvm_unmap_hva_range_hv etc.).
* While we have the rmap lock, code running on other CPUs * While we have the rmap lock, code running on other CPUs
* cannot finish unmapping the host real page that backs * cannot finish unmapping the host real page that backs
* this guest real page, so we are OK to access the host * this guest real page, so we are OK to access the host
......
...@@ -2885,7 +2885,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) ...@@ -2885,7 +2885,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/ */
trace_hardirqs_on(); trace_hardirqs_on();
guest_enter(); guest_enter_irqoff();
srcu_idx = srcu_read_lock(&vc->kvm->srcu); srcu_idx = srcu_read_lock(&vc->kvm->srcu);
...@@ -2893,8 +2893,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) ...@@ -2893,8 +2893,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
srcu_read_unlock(&vc->kvm->srcu, srcu_idx); srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
guest_exit();
trace_hardirqs_off(); trace_hardirqs_off();
set_irq_happened(trap); set_irq_happened(trap);
...@@ -2937,6 +2935,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) ...@@ -2937,6 +2935,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_set_host_core(pcpu); kvmppc_set_host_core(pcpu);
local_irq_enable(); local_irq_enable();
guest_exit();
/* Let secondaries go back to the offline loop */ /* Let secondaries go back to the offline loop */
for (i = 0; i < controlled_threads; ++i) { for (i = 0; i < controlled_threads; ++i) {
...@@ -3656,15 +3655,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) ...@@ -3656,15 +3655,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
goto up_out; goto up_out;
psize = vma_kernel_pagesize(vma); psize = vma_kernel_pagesize(vma);
porder = __ilog2(psize);
up_read(&current->mm->mmap_sem); up_read(&current->mm->mmap_sem);
/* We can handle 4k, 64k or 16M pages in the VRMA */ /* We can handle 4k, 64k or 16M pages in the VRMA */
err = -EINVAL; if (psize >= 0x1000000)
if (!(psize == 0x1000 || psize == 0x10000 || psize = 0x1000000;
psize == 0x1000000)) else if (psize >= 0x10000)
goto out_srcu; psize = 0x10000;
else
psize = 0x1000;
porder = __ilog2(psize);
senc = slb_pgsize_encoding(psize); senc = slb_pgsize_encoding(psize);
kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
...@@ -4350,7 +4351,6 @@ static struct kvmppc_ops kvm_ops_hv = { ...@@ -4350,7 +4351,6 @@ static struct kvmppc_ops kvm_ops_hv = {
.flush_memslot = kvmppc_core_flush_memslot_hv, .flush_memslot = kvmppc_core_flush_memslot_hv,
.prepare_memory_region = kvmppc_core_prepare_memory_region_hv, .prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
.commit_memory_region = kvmppc_core_commit_memory_region_hv, .commit_memory_region = kvmppc_core_commit_memory_region_hv,
.unmap_hva = kvm_unmap_hva_hv,
.unmap_hva_range = kvm_unmap_hva_range_hv, .unmap_hva_range = kvm_unmap_hva_range_hv,
.age_hva = kvm_age_hva_hv, .age_hva = kvm_age_hva_hv,
.test_age_hva = kvm_test_age_hva_hv, .test_age_hva = kvm_test_age_hva_hv,
......
...@@ -320,7 +320,6 @@ kvm_novcpu_exit: ...@@ -320,7 +320,6 @@ kvm_novcpu_exit:
stw r12, STACK_SLOT_TRAP(r1) stw r12, STACK_SLOT_TRAP(r1)
bl kvmhv_commence_exit bl kvmhv_commence_exit
nop nop
lwz r12, STACK_SLOT_TRAP(r1)
b kvmhv_switch_to_host b kvmhv_switch_to_host
/* /*
...@@ -1220,6 +1219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ...@@ -1220,6 +1219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
secondary_too_late: secondary_too_late:
li r12, 0 li r12, 0
stw r12, STACK_SLOT_TRAP(r1)
cmpdi r4, 0 cmpdi r4, 0
beq 11f beq 11f
stw r12, VCPU_TRAP(r4) stw r12, VCPU_TRAP(r4)
...@@ -1558,12 +1558,12 @@ mc_cont: ...@@ -1558,12 +1558,12 @@ mc_cont:
3: stw r5,VCPU_SLB_MAX(r9) 3: stw r5,VCPU_SLB_MAX(r9)
guest_bypass: guest_bypass:
stw r12, STACK_SLOT_TRAP(r1)
mr r3, r12 mr r3, r12
/* Increment exit count, poke other threads to exit */ /* Increment exit count, poke other threads to exit */
bl kvmhv_commence_exit bl kvmhv_commence_exit
nop nop
ld r9, HSTATE_KVM_VCPU(r13) ld r9, HSTATE_KVM_VCPU(r13)
lwz r12, VCPU_TRAP(r9)
/* Stop others sending VCPU interrupts to this physical CPU */ /* Stop others sending VCPU interrupts to this physical CPU */
li r0, -1 li r0, -1
...@@ -1898,6 +1898,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) ...@@ -1898,6 +1898,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
* POWER7/POWER8 guest -> host partition switch code. * POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do * We don't have to lock against tlbies but we do
* have to coordinate the hardware threads. * have to coordinate the hardware threads.
* Here STACK_SLOT_TRAP(r1) contains the trap number.
*/ */
kvmhv_switch_to_host: kvmhv_switch_to_host:
/* Secondary threads wait for primary to do partition switch */ /* Secondary threads wait for primary to do partition switch */
...@@ -1950,12 +1951,12 @@ BEGIN_FTR_SECTION ...@@ -1950,12 +1951,12 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* If HMI, call kvmppc_realmode_hmi_handler() */ /* If HMI, call kvmppc_realmode_hmi_handler() */
lwz r12, STACK_SLOT_TRAP(r1)
cmpwi r12, BOOK3S_INTERRUPT_HMI cmpwi r12, BOOK3S_INTERRUPT_HMI
bne 27f bne 27f
bl kvmppc_realmode_hmi_handler bl kvmppc_realmode_hmi_handler
nop nop
cmpdi r3, 0 cmpdi r3, 0
li r12, BOOK3S_INTERRUPT_HMI
/* /*
* At this point kvmppc_realmode_hmi_handler may have resync-ed * At this point kvmppc_realmode_hmi_handler may have resync-ed
* the TB, and if it has, we must not subtract the guest timebase * the TB, and if it has, we must not subtract the guest timebase
...@@ -2008,10 +2009,8 @@ BEGIN_FTR_SECTION ...@@ -2008,10 +2009,8 @@ BEGIN_FTR_SECTION
lwz r8, KVM_SPLIT_DO_RESTORE(r3) lwz r8, KVM_SPLIT_DO_RESTORE(r3)
cmpwi r8, 0 cmpwi r8, 0
beq 47f beq 47f
stw r12, STACK_SLOT_TRAP(r1)
bl kvmhv_p9_restore_lpcr bl kvmhv_p9_restore_lpcr
nop nop
lwz r12, STACK_SLOT_TRAP(r1)
b 48f b 48f
47: 47:
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
...@@ -2049,6 +2048,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) ...@@ -2049,6 +2048,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
li r0, KVM_GUEST_MODE_NONE li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13) stb r0, HSTATE_IN_GUEST(r13)
lwz r12, STACK_SLOT_TRAP(r1) /* return trap # in r12 */
ld r0, SFS+PPC_LR_STKOFF(r1) ld r0, SFS+PPC_LR_STKOFF(r1)
addi r1, r1, SFS addi r1, r1, SFS
mtlr r0 mtlr r0
......
...@@ -277,15 +277,6 @@ static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start, ...@@ -277,15 +277,6 @@ static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
} }
} }
static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
{
trace_kvm_unmap_hva(hva);
do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
return 0;
}
static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
unsigned long end) unsigned long end)
{ {
...@@ -1773,7 +1764,6 @@ static struct kvmppc_ops kvm_ops_pr = { ...@@ -1773,7 +1764,6 @@ static struct kvmppc_ops kvm_ops_pr = {
.flush_memslot = kvmppc_core_flush_memslot_pr, .flush_memslot = kvmppc_core_flush_memslot_pr,
.prepare_memory_region = kvmppc_core_prepare_memory_region_pr, .prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
.commit_memory_region = kvmppc_core_commit_memory_region_pr, .commit_memory_region = kvmppc_core_commit_memory_region_pr,
.unmap_hva = kvm_unmap_hva_pr,
.unmap_hva_range = kvm_unmap_hva_range_pr, .unmap_hva_range = kvm_unmap_hva_range_pr,
.age_hva = kvm_age_hva_pr, .age_hva = kvm_age_hva_pr,
.test_age_hva = kvm_test_age_hva_pr, .test_age_hva = kvm_test_age_hva_pr,
......
...@@ -724,7 +724,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, ...@@ -724,7 +724,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
/************* MMU Notifiers *************/ /************* MMU Notifiers *************/
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
{ {
trace_kvm_unmap_hva(hva); trace_kvm_unmap_hva(hva);
......
...@@ -1345,7 +1345,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, ...@@ -1345,7 +1345,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int rt, int is_default_endian) unsigned int rt, int is_default_endian)
{ {
enum emulation_result emulated; enum emulation_result emulated = EMULATE_DONE;
while (vcpu->arch.mmio_vmx_copy_nums) { while (vcpu->arch.mmio_vmx_copy_nums) {
emulated = __kvmppc_handle_load(run, vcpu, rt, 8, emulated = __kvmppc_handle_load(run, vcpu, rt, 8,
...@@ -1608,7 +1608,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ...@@ -1608,7 +1608,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_sigset_deactivate(vcpu); kvm_sigset_deactivate(vcpu);
#ifdef CONFIG_ALTIVEC
out: out:
#endif
vcpu_put(vcpu); vcpu_put(vcpu);
return r; return r;
} }
......
...@@ -254,21 +254,6 @@ TRACE_EVENT(kvm_exit, ...@@ -254,21 +254,6 @@ TRACE_EVENT(kvm_exit,
) )
); );
TRACE_EVENT(kvm_unmap_hva,
TP_PROTO(unsigned long hva),
TP_ARGS(hva),
TP_STRUCT__entry(
__field( unsigned long, hva )
),
TP_fast_assign(
__entry->hva = hva;
),
TP_printk("unmap hva 0x%lx\n", __entry->hva)
);
#endif /* _TRACE_KVM_H */ #endif /* _TRACE_KVM_H */
/* This part must be outside protection */ /* This part must be outside protection */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment