Commit 8dc6cca5 authored by Paul Mackerras's avatar Paul Mackerras

KVM: PPC: Book3S HV: Don't rely on host's page size information

This removes the dependence of KVM on the mmu_psize_defs array (which
stores information about hardware support for various page sizes) and
the things derived from it, chiefly hpte_page_sizes[], hpte_page_size(),
hpte_actual_page_size() and get_sllp_encoding().  We also no longer
rely on the mmu_slb_size variable or the MMU_FTR_1T_SEGMENTS feature
bit.

The reason for doing this is so we can support a HPT guest on a radix
host.  In a radix host, the mmu_psize_defs array contains information
about page sizes supported by the MMU in radix mode rather than the
page sizes supported by the MMU in HPT mode.  Similarly, mmu_slb_size
and the MMU_FTR_1T_SEGMENTS bit are not set.

Instead we hard-code knowledge of the behaviour of the HPT MMU in the
POWER7, POWER8 and POWER9 processors (which are the only processors
supported by HV KVM) - specifically the encoding of the LP fields in
the HPT and SLB entries, and the fact that they have 32 SLB entries
and support 1TB segments.
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
parent 3e8f150a
...@@ -107,18 +107,96 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v) ...@@ -107,18 +107,96 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
hpte[0] = cpu_to_be64(hpte_v); hpte[0] = cpu_to_be64(hpte_v);
} }
/*
* These functions encode knowledge of the POWER7/8/9 hardware
* interpretations of the HPTE LP (large page size) field.
*/
static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
{
unsigned int lphi;
if (!(h & HPTE_V_LARGE))
return 12; /* 4kB */
lphi = (l >> 16) & 0xf;
switch ((l >> 12) & 0xf) {
case 0:
return !lphi ? 24 : -1; /* 16MB */
break;
case 1:
return 16; /* 64kB */
break;
case 3:
return !lphi ? 34 : -1; /* 16GB */
break;
case 7:
return (16 << 8) + 12; /* 64kB in 4kB */
break;
case 8:
if (!lphi)
return (24 << 8) + 16; /* 16MB in 64kkB */
if (lphi == 3)
return (24 << 8) + 12; /* 16MB in 4kB */
break;
}
return -1;
}
static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
{
return kvmppc_hpte_page_shifts(h, l) & 0xff;
}
static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l)
{
int tmp = kvmppc_hpte_page_shifts(h, l);
if (tmp >= 0x100)
tmp >>= 8;
return tmp;
}
static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
{
return 1ul << kvmppc_hpte_actual_page_shift(v, r);
}
static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
{
switch (base_shift) {
case 12:
switch (actual_shift) {
case 12:
return 0;
case 16:
return 7;
case 24:
return 0x38;
}
break;
case 16:
switch (actual_shift) {
case 16:
return 1;
case 24:
return 8;
}
break;
case 24:
return 0;
}
return -1;
}
static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
unsigned long pte_index) unsigned long pte_index)
{ {
int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; int a_pgshift, b_pgshift;
unsigned int penc;
unsigned long rb = 0, va_low, sllp; unsigned long rb = 0, va_low, sllp;
unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
if (v & HPTE_V_LARGE) { b_pgshift = a_pgshift = kvmppc_hpte_page_shifts(v, r);
i = hpte_page_sizes[lp]; if (a_pgshift >= 0x100) {
b_psize = i & 0xf; b_pgshift &= 0xff;
a_psize = i >> 4; a_pgshift >>= 8;
} }
/* /*
...@@ -152,37 +230,33 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, ...@@ -152,37 +230,33 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
va_low ^= v >> (SID_SHIFT_1T - 16); va_low ^= v >> (SID_SHIFT_1T - 16);
va_low &= 0x7ff; va_low &= 0x7ff;
switch (b_psize) { if (b_pgshift == 12) {
case MMU_PAGE_4K: if (a_pgshift > 12) {
sllp = get_sllp_encoding(a_psize); sllp = (a_pgshift == 16) ? 5 : 4;
rb |= sllp << 5; /* AP field */ rb |= sllp << 5; /* AP field */
}
rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */ rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */
break; } else {
default:
{
int aval_shift; int aval_shift;
/* /*
* remaining bits of AVA/LP fields * remaining bits of AVA/LP fields
* Also contain the rr bits of LP * Also contain the rr bits of LP
*/ */
rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000; rb |= (va_low << b_pgshift) & 0x7ff000;
/* /*
* Now clear not needed LP bits based on actual psize * Now clear not needed LP bits based on actual psize
*/ */
rb &= ~((1ul << mmu_psize_defs[a_psize].shift) - 1); rb &= ~((1ul << a_pgshift) - 1);
/* /*
* AVAL field 58..77 - base_page_shift bits of va * AVAL field 58..77 - base_page_shift bits of va
* we have space for 58..64 bits, Missing bits should * we have space for 58..64 bits, Missing bits should
* be zero filled. +1 is to take care of L bit shift * be zero filled. +1 is to take care of L bit shift
*/ */
aval_shift = 64 - (77 - mmu_psize_defs[b_psize].shift) + 1; aval_shift = 64 - (77 - b_pgshift) + 1;
rb |= ((va_low << aval_shift) & 0xfe); rb |= ((va_low << aval_shift) & 0xfe);
rb |= 1; /* L field */ rb |= 1; /* L field */
penc = mmu_psize_defs[b_psize].penc[a_psize]; rb |= r & 0xff000 & ((1ul << a_pgshift) - 1); /* LP field */
rb |= penc << 12; /* LP field */
break;
}
} }
rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */ rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */
return rb; return rb;
......
...@@ -333,7 +333,7 @@ static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r, ...@@ -333,7 +333,7 @@ static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
{ {
unsigned long ra_mask; unsigned long ra_mask;
ra_mask = hpte_page_size(v, r) - 1; ra_mask = kvmppc_actual_pgsz(v, r) - 1;
return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask); return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask);
} }
...@@ -504,7 +504,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -504,7 +504,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
mmio_update = atomic64_read(&kvm->arch.mmio_update); mmio_update = atomic64_read(&kvm->arch.mmio_update);
if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) { if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) {
r = vcpu->arch.pgfault_cache->rpte; r = vcpu->arch.pgfault_cache->rpte;
psize = hpte_page_size(vcpu->arch.pgfault_hpte[0], r); psize = kvmppc_actual_pgsz(vcpu->arch.pgfault_hpte[0],
r);
gpa_base = r & HPTE_R_RPN & ~(psize - 1); gpa_base = r & HPTE_R_RPN & ~(psize - 1);
gfn_base = gpa_base >> PAGE_SHIFT; gfn_base = gpa_base >> PAGE_SHIFT;
gpa = gpa_base | (ea & (psize - 1)); gpa = gpa_base | (ea & (psize - 1));
...@@ -533,7 +534,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -533,7 +534,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
return RESUME_GUEST; return RESUME_GUEST;
/* Translate the logical address and get the page */ /* Translate the logical address and get the page */
psize = hpte_page_size(hpte[0], r); psize = kvmppc_actual_pgsz(hpte[0], r);
gpa_base = r & HPTE_R_RPN & ~(psize - 1); gpa_base = r & HPTE_R_RPN & ~(psize - 1);
gfn_base = gpa_base >> PAGE_SHIFT; gfn_base = gpa_base >> PAGE_SHIFT;
gpa = gpa_base | (ea & (psize - 1)); gpa = gpa_base | (ea & (psize - 1));
...@@ -797,7 +798,7 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i, ...@@ -797,7 +798,7 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
/* Now check and modify the HPTE */ /* Now check and modify the HPTE */
ptel = rev[i].guest_rpte; ptel = rev[i].guest_rpte;
psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); psize = kvmppc_actual_pgsz(be64_to_cpu(hptep[0]), ptel);
if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
hpte_rpn(ptel, psize) == gfn) { hpte_rpn(ptel, psize) == gfn) {
hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
...@@ -1091,7 +1092,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) ...@@ -1091,7 +1092,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
rev[i].guest_rpte |= HPTE_R_C; rev[i].guest_rpte |= HPTE_R_C;
note_hpte_modification(kvm, &rev[i]); note_hpte_modification(kvm, &rev[i]);
} }
n = hpte_page_size(v, r); n = kvmppc_actual_pgsz(v, r);
n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT; n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (n > npages_dirty) if (n > npages_dirty)
npages_dirty = n; npages_dirty = n;
...@@ -1266,7 +1267,7 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize, ...@@ -1266,7 +1267,7 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
guest_rpte = rev->guest_rpte; guest_rpte = rev->guest_rpte;
ret = -EIO; ret = -EIO;
apsize = hpte_page_size(vpte, guest_rpte); apsize = kvmppc_actual_pgsz(vpte, guest_rpte);
if (!apsize) if (!apsize)
goto out; goto out;
......
...@@ -3300,22 +3300,21 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) ...@@ -3300,22 +3300,21 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
} }
static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
int linux_psize) int shift, int sllp)
{ {
struct mmu_psize_def *def = &mmu_psize_defs[linux_psize]; (*sps)->page_shift = shift;
(*sps)->slb_enc = sllp;
if (!def->shift) (*sps)->enc[0].page_shift = shift;
return; (*sps)->enc[0].pte_enc = kvmppc_pgsize_lp_encoding(shift, shift);
(*sps)->page_shift = def->shift;
(*sps)->slb_enc = def->sllp;
(*sps)->enc[0].page_shift = def->shift;
(*sps)->enc[0].pte_enc = def->penc[linux_psize];
/* /*
* Add 16MB MPSS support if host supports it * Add 16MB MPSS support (may get filtered out by userspace)
*/ */
if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) { if (shift != 24) {
int penc = kvmppc_pgsize_lp_encoding(shift, 24);
if (penc != -1) {
(*sps)->enc[1].page_shift = 24; (*sps)->enc[1].page_shift = 24;
(*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M]; (*sps)->enc[1].pte_enc = penc;
}
} }
(*sps)++; (*sps)++;
} }
...@@ -3340,16 +3339,15 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, ...@@ -3340,16 +3339,15 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
info->data_keys = 32; info->data_keys = 32;
info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0; info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
info->flags = KVM_PPC_PAGE_SIZES_REAL; /* POWER7, 8 and 9 all have 1T segments and 32-entry SLB */
if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) info->flags = KVM_PPC_PAGE_SIZES_REAL | KVM_PPC_1T_SEGMENTS;
info->flags |= KVM_PPC_1T_SEGMENTS; info->slb_size = 32;
info->slb_size = mmu_slb_size;
/* We only support these sizes for now, and no muti-size segments */ /* We only support these sizes for now, and no muti-size segments */
sps = &info->sps[0]; sps = &info->sps[0];
kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K); kvmppc_add_seg_page_size(&sps, 12, 0);
kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K); kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01);
kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M); kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L);
return 0; return 0;
} }
...@@ -4352,4 +4350,3 @@ module_exit(kvmppc_book3s_exit_hv); ...@@ -4352,4 +4350,3 @@ module_exit(kvmppc_book3s_exit_hv);
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_ALIAS_MISCDEV(KVM_MINOR); MODULE_ALIAS_MISCDEV(KVM_MINOR);
MODULE_ALIAS("devname:kvm"); MODULE_ALIAS("devname:kvm");
...@@ -129,7 +129,7 @@ static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v, ...@@ -129,7 +129,7 @@ static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
unsigned long *rmap; unsigned long *rmap;
unsigned long gfn; unsigned long gfn;
gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr)); gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr));
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
if (!memslot) if (!memslot)
return NULL; return NULL;
...@@ -169,7 +169,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, ...@@ -169,7 +169,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
} }
*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
if (rcbits & HPTE_R_C) if (rcbits & HPTE_R_C)
kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r)); kvmppc_update_rmap_change(rmap,
kvmppc_actual_pgsz(hpte_v, hpte_r));
unlock_rmap(rmap); unlock_rmap(rmap);
} }
...@@ -193,7 +194,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, ...@@ -193,7 +194,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
if (kvm_is_radix(kvm)) if (kvm_is_radix(kvm))
return H_FUNCTION; return H_FUNCTION;
psize = hpte_page_size(pteh, ptel); psize = kvmppc_actual_pgsz(pteh, ptel);
if (!psize) if (!psize)
return H_PARAMETER; return H_PARAMETER;
writing = hpte_is_writable(ptel); writing = hpte_is_writable(ptel);
...@@ -848,7 +849,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -848,7 +849,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
r = be64_to_cpu(hpte[1]); r = be64_to_cpu(hpte[1]);
gr |= r & (HPTE_R_R | HPTE_R_C); gr |= r & (HPTE_R_R | HPTE_R_C);
if (r & HPTE_R_C) { if (r & HPTE_R_C) {
unsigned long psize = hpte_page_size(v, r); unsigned long psize = kvmppc_actual_pgsz(v, r);
hpte[1] = cpu_to_be64(r & ~HPTE_R_C); hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
eieio(); eieio();
rmap = revmap_for_hpte(kvm, v, gr); rmap = revmap_for_hpte(kvm, v, gr);
...@@ -1014,7 +1015,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, ...@@ -1014,7 +1015,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
* Check the HPTE again, including base page size * Check the HPTE again, including base page size
*/ */
if ((v & valid) && (v & mask) == val && if ((v & valid) && (v & mask) == val &&
hpte_base_page_size(v, r) == (1ul << pshift)) kvmppc_hpte_base_page_shift(v, r) == pshift)
/* Return with the HPTE still locked */ /* Return with the HPTE still locked */
return (hash << 3) + (i >> 1); return (hash << 3) + (i >> 1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment