Commit 97d64b78 authored by Avi Kivity's avatar Avi Kivity

KVM: MMU: Optimize pte permission checks

walk_addr_generic() permission checks are a maze of branchy code, which is
performed four times per lookup.  It depends on the type of access, efer.nxe,
cr0.wp, cr4.smep, and in the near future, cr4.smap.

Optimize this away by precalculating all variants and storing them in a
bitmap.  The bitmap is recalculated when rarely-changing variables change
(cr0, cr4) and is indexed by the often-changing variables (page fault error
code, pte access permissions).

The permission check is moved to the end of the loop, otherwise an SMEP
fault could be reported as a false positive, when PDE.U=1 but PTE.U=0.
Noted by Xiao Guangrong.

The result is short, branch-free code.
Reviewed-by: default avatarXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent 8cbc7069
...@@ -287,6 +287,13 @@ struct kvm_mmu { ...@@ -287,6 +287,13 @@ struct kvm_mmu {
union kvm_mmu_page_role base_role; union kvm_mmu_page_role base_role;
bool direct_map; bool direct_map;
/*
* Bitmap; bit set = permission fault
* Byte index: page fault error code [4:1]
* Bit index: pte permissions in ACC_* format
*/
u8 permissions[16];
u64 *pae_root; u64 *pae_root;
u64 *lm_root; u64 *lm_root;
u64 rsvd_bits_mask[2][4]; u64 rsvd_bits_mask[2][4];
......
...@@ -3516,6 +3516,38 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, ...@@ -3516,6 +3516,38 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
} }
} }
static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
{
unsigned bit, byte, pfec;
u8 map;
bool fault, x, w, u, wf, uf, ff, smep;
smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
pfec = byte << 1;
map = 0;
wf = pfec & PFERR_WRITE_MASK;
uf = pfec & PFERR_USER_MASK;
ff = pfec & PFERR_FETCH_MASK;
for (bit = 0; bit < 8; ++bit) {
x = bit & ACC_EXEC_MASK;
w = bit & ACC_WRITE_MASK;
u = bit & ACC_USER_MASK;
/* Not really needed: !nx will cause pte.nx to fault */
x |= !mmu->nx;
/* Allow supervisor writes if !cr0.wp */
w |= !is_write_protection(vcpu) && !uf;
/* Disallow supervisor fetches of user code if cr4.smep */
x &= !(smep && u && !uf);
fault = (ff && !x) || (uf && !u) || (wf && !w);
map |= fault << bit;
}
mmu->permissions[byte] = map;
}
}
static int paging64_init_context_common(struct kvm_vcpu *vcpu, static int paging64_init_context_common(struct kvm_vcpu *vcpu,
struct kvm_mmu *context, struct kvm_mmu *context,
int level) int level)
...@@ -3524,6 +3556,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, ...@@ -3524,6 +3556,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
context->root_level = level; context->root_level = level;
reset_rsvds_bits_mask(vcpu, context); reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context);
ASSERT(is_pae(vcpu)); ASSERT(is_pae(vcpu));
context->new_cr3 = paging_new_cr3; context->new_cr3 = paging_new_cr3;
...@@ -3552,6 +3585,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, ...@@ -3552,6 +3585,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
context->root_level = PT32_ROOT_LEVEL; context->root_level = PT32_ROOT_LEVEL;
reset_rsvds_bits_mask(vcpu, context); reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context);
context->new_cr3 = paging_new_cr3; context->new_cr3 = paging_new_cr3;
context->page_fault = paging32_page_fault; context->page_fault = paging32_page_fault;
...@@ -3612,6 +3646,8 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) ...@@ -3612,6 +3646,8 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->gva_to_gpa = paging32_gva_to_gpa; context->gva_to_gpa = paging32_gva_to_gpa;
} }
update_permission_bitmask(vcpu, context);
return 0; return 0;
} }
...@@ -3687,6 +3723,8 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) ...@@ -3687,6 +3723,8 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
g_context->gva_to_gpa = paging32_gva_to_gpa_nested; g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
} }
update_permission_bitmask(vcpu, g_context);
return 0; return 0;
} }
......
...@@ -89,17 +89,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu) ...@@ -89,17 +89,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
return kvm_read_cr0_bits(vcpu, X86_CR0_WP); return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
} }
static inline bool check_write_user_access(struct kvm_vcpu *vcpu, /*
bool write_fault, bool user_fault, * Will a fault with a given page-fault error code (pfec) cause a permission
unsigned long pte) * fault with the given access (in ACC_* format)?
*/
static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
unsigned pfec)
{ {
if (unlikely(write_fault && !is_writable_pte(pte) return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
&& (user_fault || is_write_protection(vcpu))))
return false;
if (unlikely(user_fault && !(pte & PT_USER_MASK)))
return false;
return true;
} }
#endif #endif
...@@ -169,7 +169,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -169,7 +169,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
pt_element_t pte; pt_element_t pte;
pt_element_t __user *uninitialized_var(ptep_user); pt_element_t __user *uninitialized_var(ptep_user);
gfn_t table_gfn; gfn_t table_gfn;
unsigned index, pt_access, uninitialized_var(pte_access); unsigned index, pt_access, pte_access;
gpa_t pte_gpa; gpa_t pte_gpa;
bool eperm, last_gpte; bool eperm, last_gpte;
int offset; int offset;
...@@ -237,24 +237,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -237,24 +237,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
goto error; goto error;
} }
if (!check_write_user_access(vcpu, write_fault, user_fault, pte_access = pt_access & gpte_access(vcpu, pte);
pte))
eperm = true;
#if PTTYPE == 64
if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
eperm = true;
#endif
last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte); last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
if (last_gpte) {
pte_access = pt_access & gpte_access(vcpu, pte);
/* check if the kernel is fetching from user page */
if (unlikely(pte_access & PT_USER_MASK) &&
kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
if (fetch_fault && !user_fault)
eperm = true;
}
walker->ptes[walker->level - 1] = pte; walker->ptes[walker->level - 1] = pte;
...@@ -284,10 +269,11 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -284,10 +269,11 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
break; break;
} }
pt_access &= gpte_access(vcpu, pte); pt_access &= pte_access;
--walker->level; --walker->level;
} }
eperm |= permission_fault(mmu, pte_access, access);
if (unlikely(eperm)) { if (unlikely(eperm)) {
errcode |= PFERR_PRESENT_MASK; errcode |= PFERR_PRESENT_MASK;
goto error; goto error;
......
...@@ -3672,20 +3672,17 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, ...@@ -3672,20 +3672,17 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
gpa_t *gpa, struct x86_exception *exception, gpa_t *gpa, struct x86_exception *exception,
bool write) bool write)
{ {
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
| (write ? PFERR_WRITE_MASK : 0);
if (vcpu_match_mmio_gva(vcpu, gva) && if (vcpu_match_mmio_gva(vcpu, gva)
check_write_user_access(vcpu, write, access, && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
vcpu->arch.access)) {
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
(gva & (PAGE_SIZE - 1)); (gva & (PAGE_SIZE - 1));
trace_vcpu_match_mmio(gva, *gpa, write, false); trace_vcpu_match_mmio(gva, *gpa, write, false);
return 1; return 1;
} }
if (write)
access |= PFERR_WRITE_MASK;
*gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
if (*gpa == UNMAPPED_GVA) if (*gpa == UNMAPPED_GVA)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment