Commit 58c5c276 authored by Paul Mackerras's avatar Paul Mackerras

KVM: PPC: Book3S HV: Handle 1GB pages in radix page fault handler

This adds code to the radix hypervisor page fault handler to handle the
case where the guest memory is backed by 1GB hugepages, and put them
into the partition-scoped radix tree at the PUD level.  The code is
essentially analogous to the code for 2MB pages.  This also rearranges
kvmppc_create_pte() to make it easier to follow.
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
parent f7caf712
......@@ -150,7 +150,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
{
int psize = MMU_BASE_PSIZE;
if (pshift >= PMD_SHIFT)
if (pshift >= PUD_SHIFT)
psize = MMU_PAGE_1G;
else if (pshift >= PMD_SHIFT)
psize = MMU_PAGE_2M;
addr &= ~0xfffUL;
addr |= mmu_psize_defs[psize].ap << 5;
......@@ -231,9 +233,9 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
new_pud = pud_alloc_one(kvm->mm, gpa);
pmd = NULL;
if (pud && pud_present(*pud))
if (pud && pud_present(*pud) && !pud_huge(*pud))
pmd = pmd_offset(pud, gpa);
else
else if (level <= 1)
new_pmd = pmd_alloc_one(kvm->mm, gpa);
if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
......@@ -254,6 +256,50 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
new_pud = NULL;
}
pud = pud_offset(pgd, gpa);
if (pud_huge(*pud)) {
unsigned long hgpa = gpa & PUD_MASK;
/*
* If we raced with another CPU which has just put
* a 1GB pte in after we saw a pmd page, try again.
*/
if (level <= 1 && !new_pmd) {
ret = -EAGAIN;
goto out_unlock;
}
/* Check if we raced and someone else has set the same thing */
if (level == 2 && pud_raw(*pud) == pte_raw(pte)) {
ret = 0;
goto out_unlock;
}
/* Valid 1GB page here already, remove it */
old = kvmppc_radix_update_pte(kvm, (pte_t *)pud,
~0UL, 0, hgpa, PUD_SHIFT);
kvmppc_radix_tlbie_page(kvm, hgpa, PUD_SHIFT);
if (old & _PAGE_DIRTY) {
unsigned long gfn = hgpa >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
memslot = gfn_to_memslot(kvm, gfn);
if (memslot && memslot->dirty_bitmap)
kvmppc_update_dirty_map(memslot,
gfn, PUD_SIZE);
}
}
if (level == 2) {
if (!pud_none(*pud)) {
/*
* There's a page table page here, but we wanted to
* install a large page, so remove and free the page
* table page. new_pmd will be NULL since level == 2.
*/
new_pmd = pmd_offset(pud, 0);
pud_clear(pud);
kvmppc_radix_flush_pwc(kvm, gpa);
}
kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
ret = 0;
goto out_unlock;
}
if (pud_none(*pud)) {
if (!new_pmd)
goto out_unlock;
......@@ -289,41 +335,43 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
kvmppc_update_dirty_map(memslot,
gfn, PMD_SIZE);
}
} else if (level == 1 && !pmd_none(*pmd)) {
/*
* There's a page table page here, but we wanted to
* install a large page, so remove and free the page
* table page. new_ptep will be NULL since level == 1.
*/
new_ptep = pte_offset_kernel(pmd, 0);
pmd_clear(pmd);
kvmppc_radix_flush_pwc(kvm, gpa);
}
if (level == 0) {
if (pmd_none(*pmd)) {
if (!new_ptep)
goto out_unlock;
pmd_populate(kvm->mm, pmd, new_ptep);
new_ptep = NULL;
}
ptep = pte_offset_kernel(pmd, gpa);
if (pte_present(*ptep)) {
/* Check if someone else set the same thing */
if (pte_raw(*ptep) == pte_raw(pte)) {
ret = 0;
goto out_unlock;
}
/* PTE was previously valid, so invalidate it */
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
0, gpa, 0);
kvmppc_radix_tlbie_page(kvm, gpa, 0);
if (old & _PAGE_DIRTY)
mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
if (level == 1) {
if (!pmd_none(*pmd)) {
/*
* There's a page table page here, but we wanted to
* install a large page, so remove and free the page
* table page. new_ptep will be NULL since level == 1.
*/
new_ptep = pte_offset_kernel(pmd, 0);
pmd_clear(pmd);
kvmppc_radix_flush_pwc(kvm, gpa);
}
kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
} else {
kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
ret = 0;
goto out_unlock;
}
if (pmd_none(*pmd)) {
if (!new_ptep)
goto out_unlock;
pmd_populate(kvm->mm, pmd, new_ptep);
new_ptep = NULL;
}
ptep = pte_offset_kernel(pmd, gpa);
if (pte_present(*ptep)) {
/* Check if someone else set the same thing */
if (pte_raw(*ptep) == pte_raw(pte)) {
ret = 0;
goto out_unlock;
}
/* PTE was previously valid, so invalidate it */
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
0, gpa, 0);
kvmppc_radix_tlbie_page(kvm, gpa, 0);
if (old & _PAGE_DIRTY)
mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
}
kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
ret = 0;
out_unlock:
......@@ -446,8 +494,13 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
pfn = page_to_pfn(page);
if (PageCompound(page)) {
pte_size <<= compound_order(compound_head(page));
/* See if we can insert a 2MB large-page PTE here */
if (pte_size >= PMD_SIZE &&
/* See if we can insert a 1GB or 2MB large PTE here */
if (pte_size >= PUD_SIZE &&
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
(hva & (PUD_SIZE - PAGE_SIZE))) {
level = 2;
pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
} else if (pte_size >= PMD_SIZE &&
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
(hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1;
......@@ -657,6 +710,10 @@ void kvmppc_free_radix(struct kvm *kvm)
for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
if (!pud_present(*pud))
continue;
if (pud_huge(*pud)) {
pud_clear(pud);
continue;
}
pmd = pmd_offset(pud, 0);
for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
if (pmd_is_leaf(*pmd)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment