Commit 921d2597 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "s390:
   - implement diag318

  x86:
   - Report last CPU for debugging
   - Emulate smaller MAXPHYADDR in the guest than in the host
   - .noinstr and tracing fixes from Thomas
   - nested SVM page table switching optimization and fixes

  Generic:
   - Unify shadow MMU cache data structures across architectures"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (127 commits)
  KVM: SVM: Fix sev_pin_memory() error handling
  KVM: LAPIC: Set the TDCR settable bits
  KVM: x86: Specify max TDP level via kvm_configure_mmu()
  KVM: x86/mmu: Rename max_page_level to max_huge_page_level
  KVM: x86: Dynamically calculate TDP level from max level and MAXPHYADDR
  KVM: VXM: Remove temporary WARN on expected vs. actual EPTP level mismatch
  KVM: x86: Pull the PGD's level from the MMU instead of recalculating it
  KVM: VMX: Make vmx_load_mmu_pgd() static
  KVM: x86/mmu: Add separate helper for shadow NPT root page role calc
  KVM: VMX: Drop a duplicate declaration of construct_eptp()
  KVM: nSVM: Correctly set the shadow NPT root level in its MMU role
  KVM: Using macros instead of magic values
  MIPS: KVM: Fix build error caused by 'kvm_run' cleanup
  KVM: nSVM: remove nonsensical EXITINFO1 adjustment on nested NPF
  KVM: x86: Add a capability for GUEST_MAXPHYADDR < HOST_MAXPHYADDR support
  KVM: VMX: optimize #PF injection when MAXPHYADDR does not match
  KVM: VMX: Add guest physical address check in EPT violation and misconfig
  KVM: VMX: introduce vmx_need_pf_intercept
  KVM: x86: update exception bitmap on CPUID changes
  KVM: x86: rename update_bp_intercept to update_exception_bitmap
  ...
parents 7b4ea945 f3633c26
......@@ -5804,8 +5804,9 @@
panic() code such as dumping handler.
xen_nopvspin [X86,XEN]
Disables the ticketlock slowpath using Xen PV
optimizations.
Disables the qspinlock slowpath using Xen PV optimizations.
This parameter is obsoleted by "nopvspin" parameter, which
has equivalent effect for XEN platform.
xen_nopv [X86]
Disables the PV optimizations forcing the HVM guest to
......@@ -5831,6 +5832,11 @@
as generic guest with no PV drivers. Currently support
XEN HVM, KVM, HYPER_V and VMWARE guest.
nopvspin [X86,XEN,KVM]
Disables the qspinlock slow path using PV optimizations
which allow the hypervisor to 'idle' the guest on lock
contention.
xirc2ps_cs= [NET,PCMCIA]
Format:
<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
......
......@@ -669,6 +669,10 @@ MSRs that have been set successfully.
Defines the vcpu responses to the cpuid instruction. Applications
should use the KVM_SET_CPUID2 ioctl if available.
Note, when this IOCTL fails, KVM gives no guarantees that previous valid CPUID
configuration (if there is) is not corrupted. Userspace can get a copy of the
resulting CPUID configuration through KVM_GET_CPUID2 in case.
::
struct kvm_cpuid_entry {
......@@ -4795,6 +4799,7 @@ hardware_exit_reason.
/* KVM_EXIT_FAIL_ENTRY */
struct {
__u64 hardware_entry_failure_reason;
__u32 cpu; /* if KVM_LAST_CPU */
} fail_entry;
If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due
......
......@@ -27,12 +27,12 @@ struct kvm_sys_reg_target_table {
void kvm_register_target_sys_reg_table(unsigned int target,
struct kvm_sys_reg_target_table *table);
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu);
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
#define kvm_coproc_table_init kvm_sys_reg_table_init
void kvm_sys_reg_table_init(void);
......
......@@ -97,17 +97,6 @@ struct kvm_arch {
bool return_nisv_io_abort_to_user;
};
#define KVM_NR_MEM_OBJS 40
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
*/
struct kvm_mmu_memory_cache {
int nobjs;
void *objects[KVM_NR_MEM_OBJS];
};
struct kvm_vcpu_fault_info {
u32 esr_el2; /* Hyp Syndrom Register */
u64 far_el2; /* Hyp Fault Address Register */
......@@ -486,18 +475,15 @@ u64 __kvm_call_hyp(void *hypfn, ...);
void force_vm_exit(const cpumask_t *mask);
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index);
void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index);
int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
/* MMIO helpers */
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
phys_addr_t fault_ipa);
int kvm_handle_mmio_return(struct kvm_vcpu *vcpu);
int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa);
int kvm_perf_init(void);
int kvm_perf_teardown(void);
......
......@@ -139,7 +139,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size, bool writable);
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_ARM64_KVM_TYPES_H
#define _ASM_ARM64_KVM_TYPES_H
#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
#endif /* _ASM_ARM64_KVM_TYPES_H */
......@@ -270,6 +270,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.target = -1;
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
......@@ -658,7 +660,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
return ret;
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu, run);
ret = kvm_handle_mmio_return(vcpu);
if (ret)
return ret;
}
......@@ -810,11 +812,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
/* Exit types that need handling before we can be preempted */
handle_exit_early(vcpu, run, ret);
handle_exit_early(vcpu, ret);
preempt_enable();
ret = handle_exit(vcpu, run, ret);
ret = handle_exit(vcpu, ret);
}
/* Tell userspace about in-kernel device output levels */
......
......@@ -25,7 +25,7 @@
#define CREATE_TRACE_POINTS
#include "trace_handle_exit.h"
typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
typedef int (*exit_handle_fn)(struct kvm_vcpu *);
static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr)
{
......@@ -33,7 +33,7 @@ static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr)
kvm_inject_vabt(vcpu);
}
static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_hvc(struct kvm_vcpu *vcpu)
{
int ret;
......@@ -50,7 +50,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
return ret;
}
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_smc(struct kvm_vcpu *vcpu)
{
/*
* "If an SMC instruction executed at Non-secure EL1 is
......@@ -69,7 +69,7 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
* Guest access to FP/ASIMD registers are routed to this handler only
* when the system doesn't support FP/ASIMD.
*/
static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
{
kvm_inject_undefined(vcpu);
return 1;
......@@ -87,7 +87,7 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct kvm_run *run)
* world-switches and schedule other host processes until there is an
* incoming IRQ or FIQ to the VM.
*/
static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
{
if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
......@@ -109,16 +109,16 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
* kvm_handle_guest_debug - handle a debug exception instruction
*
* @vcpu: the vcpu pointer
* @run: access to the kvm_run structure for results
*
* We route all debug exceptions through the same handler. If both the
* guest and host are using the same debug facilities it will be up to
* userspace to re-inject the correct exception for guest delivery.
*
* @return: 0 (while setting run->exit_reason), -1 for error
* @return: 0 (while setting vcpu->run->exit_reason), -1 for error
*/
static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 hsr = kvm_vcpu_get_hsr(vcpu);
int ret = 0;
......@@ -144,7 +144,7 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
return ret;
}
static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
{
u32 hsr = kvm_vcpu_get_hsr(vcpu);
......@@ -155,7 +155,7 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_sve(struct kvm_vcpu *vcpu)
{
/* Until SVE is supported for guests: */
kvm_inject_undefined(vcpu);
......@@ -167,7 +167,7 @@ static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run)
* a NOP). If we get here, it is that we didn't fixup ptrauth on exit, and all
* that we can do is give the guest an UNDEF.
*/
static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
{
kvm_inject_undefined(vcpu);
return 1;
......@@ -212,7 +212,7 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
* KVM_EXIT_DEBUG, otherwise userspace needs to complete its
* emulation first.
*/
static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_trap_exceptions(struct kvm_vcpu *vcpu)
{
int handled;
......@@ -227,7 +227,7 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
exit_handle_fn exit_handler;
exit_handler = kvm_get_exit_handler(vcpu);
handled = exit_handler(vcpu, run);
handled = exit_handler(vcpu);
}
return handled;
......@@ -237,9 +237,10 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
* Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
* proper exit to userspace.
*/
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index)
int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
{
struct kvm_run *run = vcpu->run;
if (ARM_SERROR_PENDING(exception_index)) {
u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
......@@ -265,7 +266,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
case ARM_EXCEPTION_EL1_SERROR:
return 1;
case ARM_EXCEPTION_TRAP:
return handle_trap_exceptions(vcpu, run);
return handle_trap_exceptions(vcpu);
case ARM_EXCEPTION_HYP_GONE:
/*
* EL2 has been reset to the hyp-stub. This happens when a guest
......@@ -289,8 +290,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
}
/* For exit types that need handling before we can be preempted */
void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index)
void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
{
if (ARM_SERROR_PENDING(exception_index)) {
if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) {
......
......@@ -77,9 +77,8 @@ unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len)
* or in-kernel IO emulation
*
* @vcpu: The VCPU pointer
* @run: The VCPU run struct containing the mmio data
*/
int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
{
unsigned long data;
unsigned int len;
......@@ -92,6 +91,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu->mmio_needed = 0;
if (!kvm_vcpu_dabt_iswrite(vcpu)) {
struct kvm_run *run = vcpu->run;
len = kvm_vcpu_dabt_get_as(vcpu);
data = kvm_mmio_read_buf(run->mmio.data, len);
......@@ -119,9 +120,9 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 0;
}
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
phys_addr_t fault_ipa)
int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
{
struct kvm_run *run = vcpu->run;
unsigned long data;
unsigned long rt;
int ret;
......@@ -188,7 +189,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (!is_write)
memcpy(run->mmio.data, data_buf, len);
vcpu->stat.mmio_exit_kernel++;
kvm_handle_mmio_return(vcpu, run);
kvm_handle_mmio_return(vcpu);
return 1;
}
......
......@@ -124,38 +124,6 @@ static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp)
put_page(virt_to_page(pudp));
}
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
int min, int max)
{
void *page;
BUG_ON(max > KVM_NR_MEM_OBJS);
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < max) {
page = (void *)__get_free_page(GFP_PGTABLE_USER);
if (!page)
return -ENOMEM;
cache->objects[cache->nobjs++] = page;
}
return 0;
}
static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
{
while (mc->nobjs)
free_page((unsigned long)mc->objects[--mc->nobjs]);
}
static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
{
void *p;
BUG_ON(!mc || !mc->nobjs);
p = mc->objects[--mc->nobjs];
return p;
}
static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
{
p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL);
......@@ -1132,7 +1100,7 @@ static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
if (stage2_pgd_none(kvm, *pgd)) {
if (!cache)
return NULL;
p4d = mmu_memory_cache_alloc(cache);
p4d = kvm_mmu_memory_cache_alloc(cache);
stage2_pgd_populate(kvm, pgd, p4d);
get_page(virt_to_page(pgd));
}
......@@ -1150,7 +1118,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
if (stage2_p4d_none(kvm, *p4d)) {
if (!cache)
return NULL;
pud = mmu_memory_cache_alloc(cache);
pud = kvm_mmu_memory_cache_alloc(cache);
stage2_p4d_populate(kvm, p4d, pud);
get_page(virt_to_page(p4d));
}
......@@ -1171,7 +1139,7 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
if (stage2_pud_none(kvm, *pud)) {
if (!cache)
return NULL;
pmd = mmu_memory_cache_alloc(cache);
pmd = kvm_mmu_memory_cache_alloc(cache);
stage2_pud_populate(kvm, pud, pmd);
get_page(virt_to_page(pud));
}
......@@ -1377,7 +1345,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
if (stage2_pud_none(kvm, *pud)) {
if (!cache)
return 0; /* ignore calls from kvm_set_spte_hva */
pmd = mmu_memory_cache_alloc(cache);
pmd = kvm_mmu_memory_cache_alloc(cache);
stage2_pud_populate(kvm, pud, pmd);
get_page(virt_to_page(pud));
}
......@@ -1402,7 +1370,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
if (pmd_none(*pmd)) {
if (!cache)
return 0; /* ignore calls from kvm_set_spte_hva */
pte = mmu_memory_cache_alloc(cache);
pte = kvm_mmu_memory_cache_alloc(cache);
kvm_pmd_populate(pmd, pte);
get_page(virt_to_page(pmd));
}
......@@ -1469,7 +1437,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t addr, end;
int ret = 0;
unsigned long pfn;
struct kvm_mmu_memory_cache cache = { 0, };
struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, };
end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
pfn = __phys_to_pfn(pa);
......@@ -1480,9 +1448,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
if (writable)
pte = kvm_s2pte_mkwrite(pte);
ret = mmu_topup_memory_cache(&cache,
kvm_mmu_cache_min_pages(kvm),
KVM_NR_MEM_OBJS);
ret = kvm_mmu_topup_memory_cache(&cache,
kvm_mmu_cache_min_pages(kvm));
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
......@@ -1496,7 +1463,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
}
out:
mmu_free_memory_cache(&cache);
kvm_mmu_free_memory_cache(&cache);
return ret;
}
......@@ -1882,8 +1849,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
mmap_read_unlock(current->mm);
/* We need minimum second+third level pages */
ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm),
KVM_NR_MEM_OBJS);
ret = kvm_mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm));
if (ret)
return ret;
......@@ -2050,7 +2016,6 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
/**
* kvm_handle_guest_abort - handles all 2nd stage aborts
* @vcpu: the VCPU pointer
* @run: the kvm_run structure
*
* Any abort that gets to the host is almost guaranteed to be caused by a
* missing second stage translation table entry, which can mean that either the
......@@ -2059,7 +2024,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
* space. The distinction is based on the IPA causing the fault and whether this
* memory region has been registered as standard RAM by user space.
*/
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
{
unsigned long fault_status;
phys_addr_t fault_ipa;
......@@ -2138,7 +2103,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
* of the page size.
*/
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
ret = io_mem_abort(vcpu, run, fault_ipa);
ret = io_mem_abort(vcpu, fault_ipa);
goto out_unlock;
}
......@@ -2307,7 +2272,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
}
phys_addr_t kvm_mmu_get_httbr(void)
......
......@@ -2156,7 +2156,7 @@ static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
}
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu)
{
kvm_inject_undefined(vcpu);
return 1;
......@@ -2335,7 +2335,7 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
return 1;
}
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu)
{
const struct sys_reg_desc *target_specific;
size_t num;
......@@ -2346,7 +2346,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
target_specific, num);
}
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu)
{
const struct sys_reg_desc *target_specific;
size_t num;
......@@ -2357,14 +2357,14 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
target_specific, num);
}
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
{
return kvm_handle_cp_64(vcpu,
cp14_64_regs, ARRAY_SIZE(cp14_64_regs),
NULL, 0);
}
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_cp14_32(struct kvm_vcpu *vcpu)
{
return kvm_handle_cp_32(vcpu,
cp14_regs, ARRAY_SIZE(cp14_regs),
......@@ -2416,9 +2416,8 @@ static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
/**
* kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access
* @vcpu: The VCPU pointer
* @run: The kvm_run struct
*/
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
{
struct sys_reg_params params;
unsigned long esr = kvm_vcpu_get_hsr(vcpu);
......
......@@ -2182,6 +2182,7 @@ endchoice
config KVM_GUEST
bool "KVM Guest Kernel"
depends on CPU_MIPS32_R2
depends on BROKEN_ON_SMP
help
Select this option if building a guest kernel for KVM (Trap & Emulate)
......
......@@ -335,17 +335,6 @@ struct kvm_mips_tlb {
long tlb_lo[2];
};
#define KVM_NR_MEM_OBJS 4
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
*/
struct kvm_mmu_memory_cache {
int nobjs;
void *objects[KVM_NR_MEM_OBJS];
};
#define KVM_MIPS_AUX_FPU 0x1
#define KVM_MIPS_AUX_MSA 0x2
......@@ -854,8 +843,8 @@ struct kvm_mips_callbacks {
const struct kvm_one_reg *reg, s64 v);
int (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
int (*vcpu_put)(struct kvm_vcpu *vcpu, int cpu);
int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
void (*vcpu_reenter)(struct kvm_run *run, struct kvm_vcpu *vcpu);
int (*vcpu_run)(struct kvm_vcpu *vcpu);
void (*vcpu_reenter)(struct kvm_vcpu *vcpu);
};
extern struct kvm_mips_callbacks *kvm_mips_callbacks;
int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
......@@ -910,7 +899,6 @@ extern int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
extern enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu,
bool write_fault);
......@@ -1021,83 +1009,67 @@ static inline bool kvm_is_ifetch_fault(struct kvm_vcpu_arch *vcpu)
extern enum emulation_result kvm_mips_emulate_inst(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
long kvm_mips_guest_exception_base(struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_syscall(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_tlbmod(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_handle_ri(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_ri_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_bp_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_trap_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_msafpe_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_fpe_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_emulate_msadis_exc(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run);
extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu);
u32 kvm_mips_read_count(struct kvm_vcpu *vcpu);
void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count);
......@@ -1126,26 +1098,21 @@ static inline void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu) {}
enum emulation_result kvm_mips_check_privilege(u32 cause,
u32 *opc,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
u32 *opc,
u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
u32 *opc,
u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
/* COP0 */
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_MIPS_KVM_TYPES_H
#define _ASM_MIPS_KVM_TYPES_H
#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 4
#endif /* _ASM_MIPS_KVM_TYPES_H */
KVM/MIPS Trap & Emulate Release Notes
=====================================
(1) KVM/MIPS should support MIPS32R2 and beyond. It has been tested on the following platforms:
Malta Board with FPGA based 34K
Sigma Designs TangoX board with a 24K based 8654 SoC.
Malta Board with 74K @ 1GHz
(2) Both Guest kernel and Guest Userspace execute in UM.
Guest User address space: 0x00000000 -> 0x40000000
Guest Kernel Unmapped: 0x40000000 -> 0x60000000
Guest Kernel Mapped: 0x60000000 -> 0x80000000
Guest Usermode virtual memory is limited to 1GB.
(2) 16K Page Sizes: Both Host Kernel and Guest Kernel should have the same page size, currently at least 16K.
Note that due to cache aliasing issues, 4K page sizes are NOT supported.
(3) No HugeTLB Support
Both the host kernel and Guest kernel should have the page size set to 16K.
This will be implemented in a future release.
(4) KVM/MIPS does not have support for SMP Guests
Linux-3.7-rc2 based SMP guest hangs due to the following code sequence in the generated TLB handlers:
LL/TLBP/SC. Since the TLBP instruction causes a trap the reservation gets cleared
when we ERET back to the guest. This causes the guest to hang in an infinite loop.
This will be fixed in a future release.
(5) Use Host FPU
Currently KVM/MIPS emulates a 24K CPU without a FPU.
This will be fixed in a future release
......@@ -37,10 +37,11 @@ choice
config KVM_MIPS_TE
bool "Trap & Emulate"
depends on CPU_MIPS32_R2
help
Use trap and emulate to virtualize 32-bit guests in user mode. This
does not require any special hardware Virtualization support beyond
standard MIPS32/64 r2 or later, but it does require the guest kernel
standard MIPS32 r2 or later, but it does require the guest kernel
to be configured with CONFIG_KVM_GUEST=y so that it resides in the
user address segment.
......
This diff is collapsed.
......@@ -450,7 +450,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
int r = -EINTR;
vcpu_load(vcpu);
......@@ -459,11 +458,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (vcpu->mmio_needed) {
if (!vcpu->mmio_is_write)
kvm_mips_complete_mmio_load(vcpu, run);
kvm_mips_complete_mmio_load(vcpu);
vcpu->mmio_needed = 0;
}
if (run->immediate_exit)
if (vcpu->run->immediate_exit)
goto out;
lose_fpu(1);
......@@ -480,7 +479,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
r = kvm_mips_callbacks->vcpu_run(run, vcpu);
r = kvm_mips_callbacks->vcpu_run(vcpu);
trace_kvm_out(vcpu);
guest_exit_irqoff();
......@@ -1236,7 +1235,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
* end up causing an exception to be delivered to the Guest
* Kernel
*/
er = kvm_mips_check_privilege(cause, opc, run, vcpu);
er = kvm_mips_check_privilege(cause, opc, vcpu);
if (er == EMULATE_PRIV_FAIL) {
goto skip_emul;
} else if (er == EMULATE_FAIL) {
......@@ -1385,7 +1384,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
kvm_mips_callbacks->vcpu_reenter(run, vcpu);
kvm_mips_callbacks->vcpu_reenter(vcpu);
/*
* If FPU / MSA are enabled (i.e. the guest's FPU / MSA context
......
......@@ -25,41 +25,9 @@
#define KVM_MMU_CACHE_MIN_PAGES 2
#endif
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
int min, int max)
{
void *page;
BUG_ON(max > KVM_NR_MEM_OBJS);
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < max) {
page = (void *)__get_free_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
cache->objects[cache->nobjs++] = page;
}
return 0;
}
static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
{
while (mc->nobjs)
free_page((unsigned long)mc->objects[--mc->nobjs]);
}
static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
{
void *p;
BUG_ON(!mc || !mc->nobjs);
p = mc->objects[--mc->nobjs];
return p;
}
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
}
/**
......@@ -153,7 +121,7 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
if (!cache)
return NULL;
new_pmd = mmu_memory_cache_alloc(cache);
new_pmd = kvm_mmu_memory_cache_alloc(cache);
pmd_init((unsigned long)new_pmd,
(unsigned long)invalid_pte_table);
pud_populate(NULL, pud, new_pmd);
......@@ -164,7 +132,7 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
if (!cache)
return NULL;
new_pte = mmu_memory_cache_alloc(cache);
new_pte = kvm_mmu_memory_cache_alloc(cache);
clear_page(new_pte);
pmd_populate_kernel(NULL, pmd, new_pte);
}
......@@ -711,8 +679,7 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
goto out;
/* We need a minimum of cached pages ready for page table creation */
err = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
KVM_NR_MEM_OBJS);
err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
if (err)
goto out;
......@@ -796,8 +763,7 @@ static pte_t *kvm_trap_emul_pte_for_gva(struct kvm_vcpu *vcpu,
int ret;
/* We need a minimum of cached pages ready for page table creation */
ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
KVM_NR_MEM_OBJS);
ret = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
if (ret)
return NULL;
......
This diff is collapsed.
......@@ -874,7 +874,6 @@ static void kvm_write_maari(struct kvm_vcpu *vcpu, unsigned long val)
static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst,
u32 *opc, u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
......@@ -1074,7 +1073,6 @@ static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst,
static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst,
u32 *opc, u32 cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
enum emulation_result er = EMULATE_DONE;
......@@ -1217,7 +1215,6 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
{
enum emulation_result er = EMULATE_DONE;
struct kvm_vcpu_arch *arch = &vcpu->arch;
struct kvm_run *run = vcpu->run;
union mips_instruction inst;
int rd, rt, sel;
int err;
......@@ -1233,12 +1230,12 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
switch (inst.r_format.opcode) {
case cop0_op:
er = kvm_vz_gpsi_cop0(inst, opc, cause, run, vcpu);
er = kvm_vz_gpsi_cop0(inst, opc, cause, vcpu);
break;
#ifndef CONFIG_CPU_MIPSR6
case cache_op:
trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu);
er = kvm_vz_gpsi_cache(inst, opc, cause, vcpu);
break;
#endif
#ifdef CONFIG_CPU_LOONGSON64
......@@ -1251,7 +1248,7 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
#ifdef CONFIG_CPU_MIPSR6
case cache6_op:
trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu);
er = kvm_vz_gpsi_cache(inst, opc, cause, vcpu);
break;
#endif
case rdhwr_op:
......@@ -1553,7 +1550,6 @@ static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu)
*/
static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_FAIL;
int ret = RESUME_GUEST;
......@@ -1581,7 +1577,7 @@ static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu)
break;
case EMULATE_FAIL:
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
break;
......@@ -1600,8 +1596,6 @@ static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu)
*/
static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
/*
* If MSA not present or not exposed to guest or FR=0, the MSA operation
* should have been treated as a reserved instruction!
......@@ -1612,7 +1606,7 @@ static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu)
(read_gc0_status() & (ST0_CU1 | ST0_FR)) == ST0_CU1 ||
!(read_gc0_config5() & MIPS_CONF5_MSAEN) ||
vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return RESUME_HOST;
}
......@@ -1648,7 +1642,7 @@ static int kvm_trap_vz_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
}
/* Treat as MMIO */
er = kvm_mips_emulate_load(inst, cause, run, vcpu);
er = kvm_mips_emulate_load(inst, cause, vcpu);
if (er == EMULATE_FAIL) {
kvm_err("Guest Emulate Load from MMIO space failed: PC: %p, BadVaddr: %#lx\n",
opc, badvaddr);
......@@ -1695,7 +1689,7 @@ static int kvm_trap_vz_handle_tlb_st_miss(struct kvm_vcpu *vcpu)
}
/* Treat as MMIO */
er = kvm_mips_emulate_store(inst, cause, run, vcpu);
er = kvm_mips_emulate_store(inst, cause, vcpu);
if (er == EMULATE_FAIL) {
kvm_err("Guest Emulate Store to MMIO space failed: PC: %p, BadVaddr: %#lx\n",
opc, badvaddr);
......@@ -3242,7 +3236,7 @@ static void kvm_vz_flush_shadow_memslot(struct kvm *kvm,
kvm_vz_flush_shadow_all(kvm);
}
static void kvm_vz_vcpu_reenter(struct kvm_run *run, struct kvm_vcpu *vcpu)
static void kvm_vz_vcpu_reenter(struct kvm_vcpu *vcpu)
{
int cpu = smp_processor_id();
int preserve_guest_tlb;
......@@ -3258,7 +3252,7 @@ static void kvm_vz_vcpu_reenter(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvm_vz_vcpu_load_wired(vcpu);
}
static int kvm_vz_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
static int kvm_vz_vcpu_run(struct kvm_vcpu *vcpu)
{
int cpu = smp_processor_id();
int r;
......@@ -3271,7 +3265,7 @@ static int kvm_vz_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvm_vz_vcpu_load_tlb(vcpu, cpu);
kvm_vz_vcpu_load_wired(vcpu);
r = vcpu->arch.vcpu_run(run, vcpu);
r = vcpu->arch.vcpu_run(vcpu->run, vcpu);
kvm_vz_vcpu_save_wired(vcpu);
......
......@@ -4,6 +4,7 @@ generated-y += syscall_table_64.h
generated-y += syscall_table_c32.h
generated-y += syscall_table_spu.h
generic-y += export.h
generic-y += kvm_types.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += vtime.h
......
......@@ -6,5 +6,6 @@ generated-y += unistd_nr.h
generic-y += asm-offsets.h
generic-y += export.h
generic-y += kvm_types.h
generic-y += local64.h
generic-y += mcs_spinlock.h
......@@ -298,10 +298,8 @@ struct diag26c_mac_resp {
union diag318_info {
unsigned long val;
struct {
unsigned int cpnc : 8;
unsigned int cpvc_linux : 24;
unsigned char cpvc_distro[3];
unsigned char zero;
unsigned long cpnc : 8;
unsigned long cpvc : 56;
};
};
......
......@@ -260,7 +260,8 @@ struct kvm_s390_sie_block {
__u32 scaol; /* 0x0064 */
__u8 sdf; /* 0x0068 */
__u8 epdx; /* 0x0069 */
__u8 reserved6a[2]; /* 0x006a */
__u8 cpnc; /* 0x006a */
__u8 reserved6b; /* 0x006b */
__u32 todpr; /* 0x006c */
#define GISA_FORMAT1 0x00000001
__u32 gd; /* 0x0070 */
......@@ -745,6 +746,7 @@ struct kvm_vcpu_arch {
bool gs_enabled;
bool skey_enabled;
struct kvm_s390_pv_vcpu pv;
union diag318_info diag318_info;
};
struct kvm_vm_stat {
......
......@@ -231,11 +231,13 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
#define KVM_SYNC_ETOKEN (1UL << 11)
#define KVM_SYNC_DIAG318 (1UL << 12)
#define KVM_SYNC_S390_VALID_FIELDS \
(KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \
KVM_SYNC_DIAG318)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
......@@ -264,7 +266,8 @@ struct kvm_sync_regs {
__u8 reserved2 : 7;
__u8 padding1[51]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
__u8 padding2[192]; /* sdnx needs to be 256byte aligned */
__u64 diag318; /* diagnose 0x318 info */
__u8 padding2[184]; /* sdnx needs to be 256byte aligned */
union {
__u8 sdnx[SDNXL]; /* state description annex */
struct {
......
......@@ -1021,8 +1021,7 @@ static void __init setup_control_program_code(void)
{
union diag318_info diag318_info = {
.cpnc = CPNC_LINUX,
.cpvc_linux = 0,
.cpvc_distro = {0},
.cpvc = 0,
};
if (!sclp.has_diag318)
......
......@@ -545,6 +545,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_AIS_MIGRATION:
case KVM_CAP_S390_VCPU_RESETS:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_S390_DIAG318:
r = 1;
break;
case KVM_CAP_S390_HPAGE_1M:
......@@ -3267,7 +3268,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
KVM_SYNC_ACRS |
KVM_SYNC_CRS |
KVM_SYNC_ARCH0 |
KVM_SYNC_PFAULT;
KVM_SYNC_PFAULT |
KVM_SYNC_DIAG318;
kvm_s390_set_prefix(vcpu, 0);
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
......@@ -3562,6 +3564,7 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->pp = 0;
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.sie_block->todpr = 0;
vcpu->arch.sie_block->cpnc = 0;
}
}
......@@ -3579,6 +3582,7 @@ static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
regs->etoken = 0;
regs->etoken_extension = 0;
regs->diag318 = 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
......@@ -3954,33 +3958,31 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
return true;
}
static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
{
hva_t hva;
struct kvm_arch_async_pf arch;
int rc;
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
return 0;
return false;
if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
vcpu->arch.pfault_compare)
return 0;
return false;
if (psw_extint_disabled(vcpu))
return 0;
return false;
if (kvm_s390_vcpu_has_irq(vcpu, 0))
return 0;
return false;
if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
return 0;
return false;
if (!vcpu->arch.gmap->pfault_enabled)
return 0;
return false;
hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
hva += current->thread.gmap_addr & ~PAGE_MASK;
if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
return 0;
return false;
rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
return rc;
return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
}
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
......@@ -4175,8 +4177,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return rc;
}
static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
struct runtime_instr_cb *riccb;
struct gs_cb *gscb;
......@@ -4196,6 +4199,10 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
kvm_clear_async_pf_completion_queue(vcpu);
}
if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
}
/*
* If userspace sets the riccb (e.g. after migration) to a valid state,
* we should enable RI here instead of doing the lazy enablement.
......@@ -4242,8 +4249,10 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* SIE will load etoken directly from SDNX and therefore kvm_run */
}
static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static void sync_regs(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
......@@ -4272,7 +4281,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* Sync fmt2 only data */
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
sync_regs_fmt2(vcpu, kvm_run);
sync_regs_fmt2(vcpu);
} else {
/*
* In several places we have to modify our internal view to
......@@ -4291,12 +4300,15 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_run->kvm_dirty_regs = 0;
}
static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static void store_regs_fmt2(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
if (MACHINE_HAS_GS) {
__ctl_set_bit(2, 4);
if (vcpu->arch.gs_enabled)
......@@ -4312,8 +4324,10 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* SIE will save etoken directly into SDNX and therefore kvm_run */
}
static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static void store_regs(struct kvm_vcpu *vcpu)
{
struct kvm_run *kvm_run = vcpu->run;
kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
......@@ -4332,7 +4346,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
store_regs_fmt2(vcpu, kvm_run);
store_regs_fmt2(vcpu);
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
......@@ -4370,7 +4384,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
goto out;
}
sync_regs(vcpu, kvm_run);
sync_regs(vcpu);
enable_cpu_timer_accounting(vcpu);
might_fault();
......@@ -4392,7 +4406,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
}
disable_cpu_timer_accounting(vcpu);
store_regs(vcpu, kvm_run);
store_regs(vcpu);
kvm_sigset_deactivate(vcpu);
......
......@@ -548,6 +548,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
scb_s->hpid = HPID_VSIE;
scb_s->cpnc = scb_o->cpnc;
prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page);
......
......@@ -803,6 +803,7 @@ config KVM_GUEST
depends on PARAVIRT
select PARAVIRT_CLOCK
select ARCH_CPUIDLE_HALTPOLL
select X86_HV_CALLBACK_VECTOR
default y
help
This option enables various optimizations for running under the KVM
......
......@@ -67,12 +67,12 @@ static inline void kvm_set_cpu_l1tf_flush_l1d(void)
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
}
static inline void kvm_clear_cpu_l1tf_flush_l1d(void)
static __always_inline void kvm_clear_cpu_l1tf_flush_l1d(void)
{
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0);
}
static inline bool kvm_get_cpu_l1tf_flush_l1d(void)
static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
{
return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
}
......
......@@ -632,6 +632,10 @@ DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback);
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback);
#endif
#ifdef CONFIG_KVM_GUEST
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_kvm_asyncpf_interrupt);
#endif
#undef X86_TRAP_OTHER
#endif
......@@ -193,8 +193,6 @@ struct x86_exception;
enum x86_intercept;
enum x86_intercept_stage;
#define KVM_NR_MEM_OBJS 40
#define KVM_NR_DB_REGS 4
#define DR6_BD (1 << 13)
......@@ -245,15 +243,6 @@ enum x86_intercept_stage;
struct kvm_kernel_irq_routing_entry;
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
*/
struct kvm_mmu_memory_cache {
int nobjs;
void *objects[KVM_NR_MEM_OBJS];
};
/*
* the pages used as guest page table on soft mmu are tracked by
* kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
......@@ -322,43 +311,6 @@ struct kvm_rmap_head {
unsigned long val;
};
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
struct list_head lpage_disallowed_link;
bool unsync;
u8 mmu_valid_gen;
bool mmio_cached;
bool lpage_disallowed; /* Can't be replaced by an equiv large page */
/*
* The following two entries are used to key the shadow page in the
* hash table.
*/
union kvm_mmu_page_role role;
gfn_t gfn;
u64 *spt;
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
DECLARE_BITMAP(unsync_child_bitmap, 512);
#ifdef CONFIG_X86_32
/*
* Used out of the mmu-lock to avoid reading spte values while an
* update is in progress; see the comments in __get_spte_lockless().
*/
int clear_spte_count;
#endif
/* Number of writes since the last time traversal visited this page. */
atomic_t write_flooding_count;
};
struct kvm_pio_request {
unsigned long linear_rip;
unsigned long count;
......@@ -384,6 +336,8 @@ struct kvm_mmu_root_info {
#define KVM_MMU_NUM_PREV_ROOTS 3
struct kvm_mmu_page;
/*
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
......@@ -580,6 +534,7 @@ struct kvm_vcpu_arch {
unsigned long cr3;
unsigned long cr4;
unsigned long cr4_guest_owned_bits;
unsigned long cr4_guest_rsvd_bits;
unsigned long cr8;
u32 host_pkru;
u32 pkru;
......@@ -635,7 +590,8 @@ struct kvm_vcpu_arch {
struct kvm_mmu *walk_mmu;
struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_shadow_page_cache;
struct kvm_mmu_memory_cache mmu_gfn_array_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
/*
......@@ -683,7 +639,7 @@ struct kvm_vcpu_arch {
struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
int maxphyaddr;
int tdp_level;
int max_tdp_level;
/* emulate context */
......@@ -827,6 +783,9 @@ struct kvm_vcpu_arch {
/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
bool l1tf_flush_l1d;
/* Host CPU on which VM-entry was most recently attempted */
unsigned int last_vmentry_cpu;
/* AMD MSRC001_0015 Hardware Configuration */
u64 msr_hwcr;
};
......@@ -1083,7 +1042,7 @@ struct kvm_x86_ops {
void (*hardware_unsetup)(void);
bool (*cpu_has_accelerated_tpr)(void);
bool (*has_emulated_msr)(u32 index);
void (*cpuid_update)(struct kvm_vcpu *vcpu);
void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
unsigned int vm_size;
int (*vm_init)(struct kvm *kvm);
......@@ -1098,7 +1057,7 @@ struct kvm_x86_ops {
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
......@@ -1174,10 +1133,10 @@ struct kvm_x86_ops {
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
int (*get_tdp_level)(struct kvm_vcpu *vcpu);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long cr3);
void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long pgd,
int pgd_level);
bool (*has_wbinvd_exit)(void);
......@@ -1220,7 +1179,6 @@ struct kvm_x86_ops {
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t offset, unsigned long mask);
int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
......@@ -1281,6 +1239,7 @@ struct kvm_x86_nested_ops {
struct kvm_nested_state __user *user_kvm_nested_state,
struct kvm_nested_state *kvm_state);
bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
int (*enable_evmcs)(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version);
......@@ -1304,7 +1263,7 @@ struct kvm_arch_async_pf {
};
extern u64 __read_mostly host_efer;
extern bool __read_mostly allow_smaller_maxphyaddr;
extern struct kvm_x86_ops kvm_x86_ops;
#define __KVM_HAVE_ARCH_VM_ALLOC
......@@ -1549,20 +1508,8 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
bool skip_mmu_sync);
void kvm_configure_mmu(bool enable_tdp, int tdp_page_level);
static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception)
{
return gpa;
}
static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
return (struct kvm_mmu_page *)page_private(page);
}
void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
int tdp_huge_page_level);
static inline u16 kvm_read_ldt(void)
{
......@@ -1636,7 +1583,15 @@ asmlinkage void kvm_spurious_fault(void);
insn "\n\t" \
"jmp 668f \n\t" \
"667: \n\t" \
"1: \n\t" \
".pushsection .discard.instr_begin \n\t" \
".long 1b - . \n\t" \
".popsection \n\t" \
"call kvm_spurious_fault \n\t" \
"1: \n\t" \
".pushsection .discard.instr_end \n\t" \
".long 1b - . \n\t" \
".popsection \n\t" \
"668: \n\t" \
_ASM_EXTABLE(666b, 667b)
......
......@@ -4,6 +4,7 @@
#include <asm/processor.h>
#include <asm/alternative.h>
#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void);
......@@ -18,7 +19,7 @@ static inline bool kvm_check_and_clear_guest_paused(void)
#endif /* CONFIG_KVM_GUEST */
#define KVM_HYPERCALL \
ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
ALTERNATIVE("vmcall", "vmmcall", X86_FEATURE_VMMCALL)
/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
* instruction. The hypervisor may replace it with something else but only the
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_KVM_TYPES_H
#define _ASM_X86_KVM_TYPES_H
#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
#endif /* _ASM_X86_KVM_TYPES_H */
......@@ -32,6 +32,7 @@ extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __pv_init_lock_hash(void);
extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
extern bool nopvspin;
#define queued_spin_unlock queued_spin_unlock
/**
......
......@@ -7,8 +7,11 @@
* Authors: Anthony Liguori <aliguori@us.ibm.com>
*/
#define pr_fmt(fmt) "kvm-guest: " fmt
#include <linux/context_tracking.h>
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/kvm_para.h>
#include <linux/cpu.h>
......@@ -232,16 +235,11 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
u32 reason = kvm_read_and_reset_apf_flags();
u32 flags = kvm_read_and_reset_apf_flags();
irqentry_state_t state;
switch (reason) {
case KVM_PV_REASON_PAGE_NOT_PRESENT:
case KVM_PV_REASON_PAGE_READY:
break;
default:
if (!flags)
return false;
}
state = irqentry_enter(regs);
instrumentation_begin();
......@@ -254,13 +252,13 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
panic("Host injected async #PF in interrupt disabled region\n");
if (reason == KVM_PV_REASON_PAGE_NOT_PRESENT) {
if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
if (unlikely(!(user_mode(regs))))
panic("Host injected async #PF in kernel mode\n");
/* Page is swapped out by the host. */
kvm_async_pf_task_wait_schedule(token);
} else {
kvm_async_pf_task_wake(token);
WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags);
}
instrumentation_end();
......@@ -268,6 +266,27 @@ noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
return true;
}
DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
{
struct pt_regs *old_regs = set_irq_regs(regs);
u32 token;
irqentry_state_t state;
state = irqentry_enter(regs);
inc_irq_stat(irq_hv_callback_count);
if (__this_cpu_read(apf_reason.enabled)) {
token = __this_cpu_read(apf_reason.token);
kvm_async_pf_task_wake(token);
__this_cpu_write(apf_reason.token, 0);
wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
}
irqentry_exit(regs, state);
set_irq_regs(old_regs);
}
static void __init paravirt_ops_setup(void)
{
pv_info.name = "KVM";
......@@ -289,8 +308,8 @@ static void kvm_register_steal_time(void)
return;
wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
pr_info("kvm-stealtime: cpu %d, msr %llx\n",
cpu, (unsigned long long) slow_virt_to_phys(st));
pr_info("stealtime: cpu %d, msr %llx\n", cpu,
(unsigned long long) slow_virt_to_phys(st));
}
static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
......@@ -311,17 +330,19 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
static void kvm_guest_cpu_init(void)
{
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
u64 pa;
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
pa |= KVM_ASYNC_PF_ENABLED;
pa |= KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
......@@ -493,7 +514,8 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
} else {
ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
ret);
min = max = apic_id;
ipi_bitmap = 0;
}
......@@ -503,7 +525,8 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
if (ipi_bitmap) {
ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
ret);
}
local_irq_restore(flags);
......@@ -533,7 +556,7 @@ static void kvm_setup_pv_ipi(void)
{
apic->send_IPI_mask = kvm_send_ipi_mask;
apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
pr_info("KVM setup pv IPIs\n");
pr_info("setup PV IPIs\n");
}
static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
......@@ -551,13 +574,6 @@ static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
}
}
static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
{
native_smp_prepare_cpus(max_cpus);
if (kvm_para_has_hint(KVM_HINTS_REALTIME))
static_branch_disable(&virt_spin_lock_key);
}
static void __init kvm_smp_prepare_boot_cpu(void)
{
/*
......@@ -646,19 +662,20 @@ static void __init kvm_guest_init(void)
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
apic_set_eoi_write(kvm_guest_apic_eoi_write);
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf)
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
static_branch_enable(&kvm_async_pf_enabled);
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_kvm_asyncpf_interrupt);
}
#ifdef CONFIG_SMP
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
if (pv_sched_yield_supported()) {
smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
pr_info("KVM setup pv sched yield\n");
pr_info("setup PV sched yield\n");
}
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
kvm_cpu_online, kvm_cpu_down_prepare) < 0)
pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
pr_err("failed to install cpu hotplug callbacks\n");
#else
sev_map_percpu_data();
kvm_guest_cpu_init();
......@@ -854,16 +871,36 @@ asm(
*/
void __init kvm_spinlock_init(void)
{
/* Does host kernel support KVM_FEATURE_PV_UNHALT? */
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
/*
* In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an
* advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is
* preferred over native qspinlock when vCPU is preempted.
*/
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) {
pr_info("PV spinlocks disabled, no host support\n");
return;
}
if (kvm_para_has_hint(KVM_HINTS_REALTIME))
return;
/*
* Disable PV spinlocks and use native qspinlock when dedicated pCPUs
* are available.
*/
if (kvm_para_has_hint(KVM_HINTS_REALTIME)) {
pr_info("PV spinlocks disabled with KVM_HINTS_REALTIME hints\n");
goto out;
}
/* Don't use the pvqspinlock code if there is only 1 vCPU. */
if (num_possible_cpus() == 1)
return;
if (num_possible_cpus() == 1) {
pr_info("PV spinlocks disabled, single CPU\n");
goto out;
}
if (nopvspin) {
pr_info("PV spinlocks disabled, forced by \"nopvspin\" parameter\n");
goto out;
}
pr_info("PV spinlocks enabled\n");
__pv_init_lock_hash();
pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
......@@ -876,6 +913,13 @@ void __init kvm_spinlock_init(void)
pv_ops.lock.vcpu_is_preempted =
PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
}
/*
* When PV spinlock is enabled which is preferred over
* virt_spin_lock(), virt_spin_lock_key's value is meaningless.
* Just disable it anyway.
*/
out:
static_branch_disable(&virt_spin_lock_key);
}
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
......@@ -895,8 +939,8 @@ static void kvm_enable_host_haltpoll(void *i)
void arch_haltpoll_enable(unsigned int cpu)
{
if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
pr_err_once("kvm: host does not support poll control\n");
pr_err_once("kvm: host upgrade recommended\n");
pr_err_once("host does not support poll control\n");
pr_err_once("host upgrade recommended\n");
return;
}
......
......@@ -54,28 +54,38 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
#define F feature_bit
int kvm_update_cpuid(struct kvm_vcpu *vcpu)
static int kvm_check_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
struct kvm_lapic *apic = vcpu->arch.apic;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (!best)
return 0;
/*
* The existing code assumes virtual address is 48-bit or 57-bit in the
* canonical address checks; exit if it is ever changed.
*/
best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
if (best) {
int vaddr_bits = (best->eax & 0xff00) >> 8;
if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0)
return -EINVAL;
}
/* Update OSXSAVE bit */
if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1)
cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
return 0;
}
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (best) {
/* Update OSXSAVE bit */
if (boot_cpu_has(X86_FEATURE_XSAVE))
cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
cpuid_entry_change(best, X86_FEATURE_APIC,
cpuid_entry_change(best, X86_FEATURE_APIC,
vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
if (apic) {
if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
apic->lapic_timer.timer_mode_mask = 3 << 17;
else
apic->lapic_timer.timer_mode_mask = 1 << 17;
}
best = kvm_find_cpuid_entry(vcpu, 7, 0);
......@@ -84,31 +94,14 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
if (!best) {
vcpu->arch.guest_supported_xcr0 = 0;
} else {
vcpu->arch.guest_supported_xcr0 =
(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
if (best)
best->ebx = xstate_required_size(vcpu->arch.xcr0, false);
}
best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
/*
* The existing code assumes virtual address is 48-bit or 57-bit in the
* canonical address checks; exit if it is ever changed.
*/
best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
if (best) {
int vaddr_bits = (best->eax & 0xff00) >> 8;
if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0)
return -EINVAL;
}
best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
if (kvm_hlt_in_guest(vcpu->kvm) && best &&
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
......@@ -121,14 +114,39 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.ia32_misc_enable_msr &
MSR_IA32_MISC_ENABLE_MWAIT);
}
}
static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
kvm_x86_ops.vcpu_after_set_cpuid(vcpu);
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (best && apic) {
if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
apic->lapic_timer.timer_mode_mask = 3 << 17;
else
apic->lapic_timer.timer_mode_mask = 1 << 17;
kvm_apic_set_version(vcpu);
}
best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
if (!best)
vcpu->arch.guest_supported_xcr0 = 0;
else
vcpu->arch.guest_supported_xcr0 =
(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
/* Note, maxphyaddr must be updated before tdp_level. */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu);
kvm_mmu_reset_context(vcpu);
kvm_pmu_refresh(vcpu);
return 0;
vcpu->arch.cr4_guest_rsvd_bits =
__cr4_reserved_bits(guest_cpuid_has, vcpu);
kvm_x86_ops.update_exception_bitmap(vcpu);
}
static int is_efer_nx(void)
......@@ -203,10 +221,16 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
vcpu->arch.cpuid_entries[i].padding[2] = 0;
}
vcpu->arch.cpuid_nent = cpuid->nent;
r = kvm_check_cpuid(vcpu);
if (r) {
vcpu->arch.cpuid_nent = 0;
kvfree(cpuid_entries);
goto out;
}
cpuid_fix_nx_cap(vcpu);
kvm_apic_set_version(vcpu);
kvm_x86_ops.cpuid_update(vcpu);
r = kvm_update_cpuid(vcpu);
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
kvfree(cpuid_entries);
out:
......@@ -227,9 +251,14 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
goto out;
vcpu->arch.cpuid_nent = cpuid->nent;
kvm_apic_set_version(vcpu);
kvm_x86_ops.cpuid_update(vcpu);
r = kvm_update_cpuid(vcpu);
r = kvm_check_cpuid(vcpu);
if (r) {
vcpu->arch.cpuid_nent = 0;
goto out;
}
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
out:
return r;
}
......@@ -604,7 +633,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
eax.split.bit_width = cap.bit_width_gp;
eax.split.mask_length = cap.events_mask_len;
edx.split.num_counters_fixed = cap.num_counters_fixed;
edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS);
edx.split.bit_width_fixed = cap.bit_width_fixed;
edx.split.reserved = 0;
......
......@@ -9,7 +9,7 @@
extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
void kvm_set_cpu_caps(void);
int kvm_update_cpuid(struct kvm_vcpu *vcpu);
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
......
......@@ -354,7 +354,6 @@ static inline int apic_lvt_nmi_mode(u32 lvt_val)
void kvm_apic_set_version(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *feat;
u32 v = APIC_VERSION;
if (!lapic_in_kernel(vcpu))
......@@ -367,8 +366,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
* version first and level-triggered interrupts never get EOIed in
* IOAPIC.
*/
feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
!ioapic_in_kernel(vcpu->kvm))
v |= APIC_LVR_DIRECTED_EOI;
kvm_lapic_set_reg(apic, APIC_LVR, v);
......@@ -2068,7 +2066,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_TDCR: {
uint32_t old_divisor = apic->divide_count;
kvm_lapic_set_reg(apic, APIC_TDCR, val);
kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
update_divide_count(apic);
if (apic->divide_count != old_divisor &&
apic->lapic_timer.period) {
......@@ -2085,7 +2083,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_SELF_IPI:
if (apic_x2apic_mode(apic)) {
kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
kvm_lapic_reg_write(apic, APIC_ICR,
APIC_DEST_SELF | (val & APIC_VECTOR_MASK));
} else
ret = 1;
break;
......@@ -2232,7 +2231,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
vcpu->arch.apic_base = value;
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
kvm_update_cpuid(vcpu);
kvm_update_cpuid_runtime(vcpu);
if (!apic)
return;
......
......@@ -4,6 +4,7 @@
#include <linux/kvm_host.h>
#include "kvm_cache_regs.h"
#include "cpuid.h"
#define PT64_PT_BITS 9
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
......@@ -57,22 +58,14 @@ void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer);
void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer,
gpa_t nested_cr3);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty, gpa_t new_eptp);
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm)
{
if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
return kvm->arch.n_max_mmu_pages -
kvm->arch.n_used_mmu_pages;
return 0;
}
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
if (likely(vcpu->arch.mmu->root_hpa != INVALID_PAGE))
......@@ -97,9 +90,13 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
{
if (VALID_PAGE(vcpu->arch.mmu->root_hpa))
kvm_x86_ops.load_mmu_pgd(vcpu, vcpu->arch.mmu->root_hpa |
kvm_get_active_pcid(vcpu));
u64 root_hpa = vcpu->arch.mmu->root_hpa;
if (!VALID_PAGE(root_hpa))
return;
kvm_x86_ops.load_mmu_pgd(vcpu, root_hpa | kvm_get_active_pcid(vcpu),
vcpu->arch.mmu->shadow_root_level);
}
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
......@@ -158,6 +155,11 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
}
static inline bool kvm_mmu_is_illegal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
{
return (gpa >= BIT_ULL(cpuid_maxphyaddr(vcpu)));
}
/*
* Check if a given access (described through the I/D, W/R and U/S bits of a
* page fault error code pfec) causes a permission fault with the given PTE
......@@ -218,11 +220,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn);
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
int kvm_mmu_post_init_vm(struct kvm *kvm);
void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
......
This diff is collapsed.
......@@ -45,7 +45,7 @@ static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
!is_last_spte(ent[i], level)) {
struct kvm_mmu_page *child;
child = page_header(ent[i] & PT64_BASE_ADDR_MASK);
child = to_shadow_page(ent[i] & PT64_BASE_ADDR_MASK);
__mmu_spte_walk(vcpu, child, fn, level - 1);
}
}
......@@ -62,7 +62,7 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu->root_hpa;
sp = page_header(root);
sp = to_shadow_page(root);
__mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu->root_level);
return;
}
......@@ -72,7 +72,7 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
if (root && VALID_PAGE(root)) {
root &= PT64_BASE_ADDR_MASK;
sp = page_header(root);
sp = to_shadow_page(root);
__mmu_spte_walk(vcpu, sp, fn, 2);
}
}
......@@ -97,7 +97,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
kvm_pfn_t pfn;
hpa_t hpa;
sp = page_header(__pa(sptep));
sp = sptep_to_sp(sptep);
if (sp->unsync) {
if (level != PG_LEVEL_4K) {
......@@ -132,7 +132,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
struct kvm_memory_slot *slot;
gfn_t gfn;
rev_sp = page_header(__pa(sptep));
rev_sp = sptep_to_sp(sptep);
gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
slots = kvm_memslots_for_spte_role(kvm, rev_sp->role);
......@@ -165,7 +165,7 @@ static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level)
static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level)
{
struct kvm_mmu_page *sp = page_header(__pa(sptep));
struct kvm_mmu_page *sp = sptep_to_sp(sptep);
if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync)
audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync "
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __KVM_X86_MMU_INTERNAL_H
#define __KVM_X86_MMU_INTERNAL_H
#include <linux/types.h>
#include <asm/kvm_host.h>
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
struct list_head lpage_disallowed_link;
bool unsync;
u8 mmu_valid_gen;
bool mmio_cached;
bool lpage_disallowed; /* Can't be replaced by an equiv large page */
/*
* The following two entries are used to key the shadow page in the
* hash table.
*/
union kvm_mmu_page_role role;
gfn_t gfn;
u64 *spt;
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
DECLARE_BITMAP(unsync_child_bitmap, 512);
#ifdef CONFIG_X86_32
/*
* Used out of the mmu-lock to avoid reading spte values while an
* update is in progress; see the comments in __get_spte_lockless().
*/
int clear_spte_count;
#endif
/* Number of writes since the last time traversal visited this page. */
atomic_t write_flooding_count;
};
static inline struct kvm_mmu_page *to_shadow_page(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
return (struct kvm_mmu_page *)page_private(page);
}
static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
{
return to_shadow_page(__pa(sptep));
}
void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn);
#endif /* __KVM_X86_MMU_INTERNAL_H */
......@@ -387,7 +387,7 @@ TRACE_EVENT(
#endif /* _TRACE_KVMMMU_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_PATH mmu
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE mmutrace
......
......@@ -16,7 +16,7 @@
#include <asm/kvm_page_track.h>
#include "mmu.h"
#include "mmu_internal.h"
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
{
......
......@@ -260,7 +260,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
!(pte & PT_GUEST_DIRTY_MASK)) {
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
#if PTTYPE == PTTYPE_EPT
if (kvm_arch_write_log_dirty(vcpu, addr))
if (kvm_x86_ops.nested_ops->write_log_dirty(vcpu, addr))
return -EINVAL;
#endif
pte |= PT_GUEST_DIRTY_MASK;
......@@ -596,7 +596,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
u64 *spte;
int i;
sp = page_header(__pa(sptep));
sp = sptep_to_sp(sptep);
if (sp->role.level > PG_LEVEL_4K)
return;
......@@ -789,10 +789,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
r = mmu_topup_memory_caches(vcpu);
if (r)
return r;
/*
* If PFEC.RSVD is set, this is a shadow page fault.
* The bit needs to be cleared before walking guest page tables.
......@@ -820,6 +816,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
return RET_PF_EMULATE;
}
r = mmu_topup_memory_caches(vcpu, true);
if (r)
return r;
vcpu->arch.write_fault_to_shadow_pgtable = false;
is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
......@@ -866,7 +866,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
goto out_unlock;
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
if (make_mmu_pages_available(vcpu) < 0)
r = make_mmu_pages_available(vcpu);
if (r)
goto out_unlock;
r = FNAME(fetch)(vcpu, addr, &walker, write_fault, max_level, pfn,
map_writable, prefault, lpage_disallowed);
......@@ -903,7 +904,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
* No need to check return value here, rmap_can_add() can
* help us to skip pte prefetch later.
*/
mmu_topup_memory_caches(vcpu);
mmu_topup_memory_caches(vcpu, true);
if (!VALID_PAGE(root_hpa)) {
WARN_ON(1);
......@@ -915,7 +916,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
level = iterator.level;
sptep = iterator.sptep;
sp = page_header(__pa(sptep));
sp = sptep_to_sp(sptep);
if (is_last_spte(*sptep, level)) {
pt_element_t gpte;
gpa_t pte_gpa;
......
......@@ -372,6 +372,11 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
if (!pmc)
return 1;
if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) &&
(kvm_x86_ops.get_cpl(vcpu) != 0) &&
(kvm_read_cr0(vcpu) & X86_CR0_PE))
return 1;
*data = pmc_read_counter(pmc) & mask;
return 0;
}
......
......@@ -15,6 +15,8 @@
#define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
#define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
#define MAX_FIXED_COUNTERS 3
struct kvm_event_hw_type_mapping {
u8 eventsel;
u8 unit_mask;
......
......@@ -665,7 +665,7 @@ void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
} else {
vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
}
mark_dirty(vmcb, VMCB_AVIC);
vmcb_mark_dirty(vmcb, VMCB_AVIC);
svm_set_pi_irte_mode(vcpu, activated);
}
......
......@@ -48,13 +48,6 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
svm->vmcb->control.exit_info_1 |= fault->error_code;
/*
* The present bit is always zero for page structure faults on real
* hardware.
*/
if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
svm->vmcb->control.exit_info_1 &= ~1;
nested_svm_vmexit(svm);
}
......@@ -87,11 +80,11 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
WARN_ON(mmu_is_nested(vcpu));
vcpu->arch.mmu = &vcpu->arch.guest_mmu;
kvm_init_shadow_mmu(vcpu, X86_CR0_PG, hsave->save.cr4, hsave->save.efer);
kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, hsave->save.cr4, hsave->save.efer,
svm->nested.ctl.nested_cr3);
vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
vcpu->arch.mmu->shadow_root_level = vcpu->arch.tdp_level;
reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
}
......@@ -106,7 +99,7 @@ void recalc_intercepts(struct vcpu_svm *svm)
{
struct vmcb_control_area *c, *h, *g;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
if (!is_guest_mode(&svm->vcpu))
return;
......@@ -222,8 +215,9 @@ static bool nested_vmcb_check_controls(struct vmcb_control_area *control)
return true;
}
static bool nested_vmcb_checks(struct vmcb *vmcb)
static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb)
{
bool nested_vmcb_lma;
if ((vmcb->save.efer & EFER_SVME) == 0)
return false;
......@@ -231,6 +225,30 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
(vmcb->save.cr0 & X86_CR0_NW))
return false;
if (!kvm_dr6_valid(vmcb->save.dr6) || !kvm_dr7_valid(vmcb->save.dr7))
return false;
nested_vmcb_lma =
(vmcb->save.efer & EFER_LME) &&
(vmcb->save.cr0 & X86_CR0_PG);
if (!nested_vmcb_lma) {
if (vmcb->save.cr4 & X86_CR4_PAE) {
if (vmcb->save.cr3 & MSR_CR3_LEGACY_PAE_RESERVED_MASK)
return false;
} else {
if (vmcb->save.cr3 & MSR_CR3_LEGACY_RESERVED_MASK)
return false;
}
} else {
if (!(vmcb->save.cr4 & X86_CR4_PAE) ||
!(vmcb->save.cr0 & X86_CR0_PE) ||
(vmcb->save.cr3 & MSR_CR3_LONG_RESERVED_MASK))
return false;
}
if (kvm_valid_cr4(&svm->vcpu, vmcb->save.cr4))
return false;
return nested_vmcb_check_controls(&vmcb->control);
}
......@@ -258,7 +276,7 @@ void sync_nested_vmcb_control(struct vcpu_svm *svm)
/* Only a few fields of int_ctl are written by the processor. */
mask = V_IRQ_MASK | V_TPR_MASK;
if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
is_intercept(svm, INTERCEPT_VINTR)) {
svm_is_intercept(svm, INTERCEPT_VINTR)) {
/*
* In order to request an interrupt window, L0 is usurping
* svm->vmcb->control.int_ctl and possibly setting V_IRQ
......@@ -310,6 +328,42 @@ static void nested_vmcb_save_pending_event(struct vcpu_svm *svm,
nested_vmcb->control.exit_int_info = exit_int_info;
}
static inline bool nested_npt_enabled(struct vcpu_svm *svm)
{
return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
}
/*
* Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true
* if we are emulating VM-Entry into a guest with NPT enabled.
*/
static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
bool nested_npt)
{
if (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63))
return -EINVAL;
if (!nested_npt && is_pae_paging(vcpu) &&
(cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) {
if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
return -EINVAL;
}
/*
* TODO: optimize unconditional TLB flush/MMU sync here and in
* kvm_init_shadow_npt_mmu().
*/
if (!nested_npt)
kvm_mmu_new_pgd(vcpu, cr3, false, false);
vcpu->arch.cr3 = cr3;
kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
kvm_init_mmu(vcpu, false);
return 0;
}
static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
{
/* Load the nested guest state */
......@@ -323,8 +377,6 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_v
svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
(void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
......@@ -342,13 +394,9 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_v
static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
{
const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
if (svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE)
nested_svm_init_mmu_context(&svm->vcpu);
/* Guest paging mode is active - reset mmu */
kvm_mmu_reset_context(&svm->vcpu);
svm_flush_tlb(&svm->vcpu);
if (nested_npt_enabled(svm))
nested_svm_init_mmu_context(&svm->vcpu);
svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
svm->vcpu.arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
......@@ -375,18 +423,27 @@ static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
*/
recalc_intercepts(svm);
mark_all_dirty(svm->vmcb);
vmcb_mark_all_dirty(svm->vmcb);
}
void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
struct vmcb *nested_vmcb)
{
int ret;
svm->nested.vmcb = vmcb_gpa;
load_nested_vmcb_control(svm, &nested_vmcb->control);
nested_prepare_vmcb_save(svm, nested_vmcb);
nested_prepare_vmcb_control(svm);
ret = nested_svm_load_cr3(&svm->vcpu, nested_vmcb->save.cr3,
nested_npt_enabled(svm));
if (ret)
return ret;
svm_set_gif(svm, true);
return 0;
}
int nested_svm_vmrun(struct vcpu_svm *svm)
......@@ -416,7 +473,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
nested_vmcb = map.hva;
if (!nested_vmcb_checks(nested_vmcb)) {
if (!nested_vmcb_checks(svm, nested_vmcb)) {
nested_vmcb->control.exit_code = SVM_EXIT_ERR;
nested_vmcb->control.exit_code_hi = 0;
nested_vmcb->control.exit_info_1 = 0;
......@@ -464,16 +521,22 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
copy_vmcb_control_area(&hsave->control, &vmcb->control);
svm->nested.nested_run_pending = 1;
enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb);
if (!nested_svm_vmrun_msrpm(svm)) {
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
if (enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb))
goto out_exit_err;
nested_svm_vmexit(svm);
}
if (nested_svm_vmrun_msrpm(svm))
goto out;
out_exit_err:
svm->nested.nested_run_pending = 0;
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
nested_svm_vmexit(svm);
out:
kvm_vcpu_unmap(&svm->vcpu, &map, true);
......@@ -585,12 +648,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
svm_set_efer(&svm->vcpu, hsave->save.efer);
svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
svm_set_cr4(&svm->vcpu, hsave->save.cr4);
if (npt_enabled) {
svm->vmcb->save.cr3 = hsave->save.cr3;
svm->vcpu.arch.cr3 = hsave->save.cr3;
} else {
(void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
}
kvm_rax_write(&svm->vcpu, hsave->save.rax);
kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
kvm_rip_write(&svm->vcpu, hsave->save.rip);
......@@ -598,7 +655,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
svm->vmcb->save.cpl = 0;
svm->vmcb->control.exit_int_info = 0;
mark_all_dirty(svm->vmcb);
vmcb_mark_all_dirty(svm->vmcb);
trace_kvm_nested_vmexit_inject(nested_vmcb->control.exit_code,
nested_vmcb->control.exit_info_1,
......@@ -610,8 +667,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_vcpu_unmap(&svm->vcpu, &map, true);
nested_svm_uninit_mmu_context(&svm->vcpu);
kvm_mmu_reset_context(&svm->vcpu);
kvm_mmu_load(&svm->vcpu);
rc = nested_svm_load_cr3(&svm->vcpu, hsave->save.cr3, false);
if (rc)
return 1;
if (npt_enabled)
svm->vmcb->save.cr3 = hsave->save.cr3;
/*
* Drop what we picked up for L2 via svm_complete_interrupts() so it
......
......@@ -313,13 +313,15 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
int write)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
unsigned long npages, npinned, size;
unsigned long npages, size;
int npinned;
unsigned long locked, lock_limit;
struct page **pages;
unsigned long first, last;
int ret;
if (ulen == 0 || uaddr + ulen < uaddr)
return NULL;
return ERR_PTR(-EINVAL);
/* Calculate number of pages. */
first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
......@@ -330,9 +332,12 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
return NULL;
return ERR_PTR(-ENOMEM);
}
if (WARN_ON_ONCE(npages > INT_MAX))
return ERR_PTR(-EINVAL);
/* Avoid using vmalloc for smaller buffers. */
size = npages * sizeof(struct page *);
if (size > PAGE_SIZE)
......@@ -341,12 +346,13 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
if (!pages)
return NULL;
return ERR_PTR(-ENOMEM);
/* Pin the user virtual address. */
npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
if (npinned != npages) {
pr_err("SEV: Failure locking %lu pages.\n", npages);
ret = -ENOMEM;
goto err;
}
......@@ -357,10 +363,10 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
err:
if (npinned > 0)
release_pages(pages, npinned);
unpin_user_pages(pages, npinned);
kvfree(pages);
return NULL;
return ERR_PTR(ret);
}
static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
......@@ -368,7 +374,7 @@ static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
release_pages(pages, npages);
unpin_user_pages(pages, npages);
kvfree(pages);
sev->pages_locked -= npages;
}
......@@ -434,8 +440,8 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Lock the user memory. */
inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
if (!inpages) {
ret = -ENOMEM;
if (IS_ERR(inpages)) {
ret = PTR_ERR(inpages);
goto e_free;
}
......@@ -789,13 +795,13 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
/* lock userspace source and destination page */
src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
if (!src_p)
return -EFAULT;
if (IS_ERR(src_p))
return PTR_ERR(src_p);
dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
if (!dst_p) {
if (IS_ERR(dst_p)) {
sev_unpin_memory(kvm, src_p, n);
return -EFAULT;
return PTR_ERR(dst_p);
}
/*
......@@ -860,8 +866,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
return -EFAULT;
pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
if (!pages)
return -ENOMEM;
if (IS_ERR(pages))
return PTR_ERR(pages);
/*
* The secret must be copied into contiguous memory region, lets verify
......@@ -987,8 +993,8 @@ int svm_register_enc_region(struct kvm *kvm,
return -ENOMEM;
region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
if (!region->pages) {
ret = -ENOMEM;
if (IS_ERR(region->pages)) {
ret = PTR_ERR(region->pages);
goto e_free;
}
......@@ -1180,11 +1186,10 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
* 2) or this VMCB was executed on different host CPU in previous VMRUNs.
*/
if (sd->sev_vmcbs[asid] == svm->vmcb &&
svm->last_cpu == cpu)
svm->vcpu.arch.last_vmentry_cpu == cpu)
return;
svm->last_cpu = cpu;
sd->sev_vmcbs[asid] = svm->vmcb;
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
mark_dirty(svm->vmcb, VMCB_ASID);
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
}
This diff is collapsed.
......@@ -81,7 +81,7 @@ struct kvm_svm {
struct kvm_vcpu;
struct nested_state {
struct svm_nested_state {
struct vmcb *hsave;
u64 hsave_msr;
u64 vm_cr_msr;
......@@ -133,7 +133,7 @@ struct vcpu_svm {
ulong nmi_iret_rip;
struct nested_state nested;
struct svm_nested_state nested;
bool nmi_singlestep;
u64 nmi_singlestep_guest_rflags;
......@@ -158,9 +158,6 @@ struct vcpu_svm {
*/
struct list_head ir_list;
spinlock_t ir_list_lock;
/* which host CPU was used for running this vcpu */
unsigned int last_cpu;
};
struct svm_cpu_data {
......@@ -188,18 +185,18 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
return container_of(kvm, struct kvm_svm, kvm);
}
static inline void mark_all_dirty(struct vmcb *vmcb)
static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
{
vmcb->control.clean = 0;
}
static inline void mark_all_clean(struct vmcb *vmcb)
static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
{
vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
& ~VMCB_ALWAYS_DIRTY_MASK;
}
static inline void mark_dirty(struct vmcb *vmcb, int bit)
static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
{
vmcb->control.clean &= ~(1 << bit);
}
......@@ -293,7 +290,7 @@ static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
recalc_intercepts(svm);
}
static inline void set_intercept(struct vcpu_svm *svm, int bit)
static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
{
struct vmcb *vmcb = get_host_vmcb(svm);
......@@ -302,7 +299,7 @@ static inline void set_intercept(struct vcpu_svm *svm, int bit)
recalc_intercepts(svm);
}
static inline void clr_intercept(struct vcpu_svm *svm, int bit)
static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit)
{
struct vmcb *vmcb = get_host_vmcb(svm);
......@@ -311,7 +308,7 @@ static inline void clr_intercept(struct vcpu_svm *svm, int bit)
recalc_intercepts(svm);
}
static inline bool is_intercept(struct vcpu_svm *svm, int bit)
static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
{
return (svm->vmcb->control.intercept & (1ULL << bit)) != 0;
}
......@@ -346,7 +343,10 @@ static inline bool gif_set(struct vcpu_svm *svm)
}
/* svm.c */
#define MSR_INVALID 0xffffffffU
#define MSR_CR3_LEGACY_RESERVED_MASK 0xfe7U
#define MSR_CR3_LEGACY_PAE_RESERVED_MASK 0x7U
#define MSR_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U
#define MSR_INVALID 0xffffffffU
u32 svm_msrpm_offset(u32 msr);
void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
......@@ -365,7 +365,7 @@ void svm_set_gif(struct vcpu_svm *svm, bool value);
#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
static inline bool nested_svm_virtualize_tpr(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
......@@ -387,8 +387,8 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_NMI));
}
void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
struct vmcb *nested_vmcb);
int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
struct vmcb *nested_vmcb);
void svm_leave_nested(struct vcpu_svm *svm);
int nested_svm_vmrun(struct vcpu_svm *svm);
void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
......@@ -420,7 +420,7 @@ extern int avic;
static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
{
svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
mark_dirty(svm->vmcb, VMCB_AVIC);
vmcb_mark_dirty(svm->vmcb, VMCB_AVIC);
}
static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
......
......@@ -27,7 +27,7 @@
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
#endif
.text
.section .noinstr.text, "ax"
/**
* __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode
......
This diff is collapsed.
......@@ -146,7 +146,9 @@ do { \
: : op1 : "cc" : error, fault); \
return; \
error: \
instrumentation_begin(); \
insn##_error(error_args); \
instrumentation_end(); \
return; \
fault: \
kvm_spurious_fault(); \
......@@ -161,7 +163,9 @@ do { \
: : op1, op2 : "cc" : error, fault); \
return; \
error: \
instrumentation_begin(); \
insn##_error(error_args); \
instrumentation_end(); \
return; \
fault: \
kvm_spurious_fault(); \
......
......@@ -180,9 +180,6 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
ret = pmu->version > 1;
break;
case MSR_IA32_PERF_CAPABILITIES:
ret = 1;
break;
default:
ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
......@@ -224,12 +221,6 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
msr_info->data = pmu->global_ovf_ctrl;
return 0;
case MSR_IA32_PERF_CAPABILITIES:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
return 1;
msr_info->data = vcpu->arch.perf_capabilities;
return 0;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
......@@ -289,14 +280,6 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 0;
}
break;
case MSR_IA32_PERF_CAPABILITIES:
if (!msr_info->host_initiated)
return 1;
if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) ?
(data & ~vmx_get_perf_capabilities()) : data)
return 1;
vcpu->arch.perf_capabilities = data;
return 0;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
......
......@@ -27,7 +27,7 @@
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
#endif
.text
.section .noinstr.text, "ax"
/**
* vmx_vmenter - VM-Enter the current loaded VMCS
......@@ -234,6 +234,9 @@ SYM_FUNC_START(__vmx_vcpu_run)
jmp 1b
SYM_FUNC_END(__vmx_vcpu_run)
.section .text, "ax"
/**
* vmread_error_trampoline - Trampoline from inline asm to vmread_error()
* @field: VMCS field encoding that failed
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -114,9 +114,8 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_vcpu_stolen);
*/
void __init xen_init_spinlocks(void)
{
/* Don't need to use pvqspinlock code if there is only 1 vCPU. */
if (num_possible_cpus() == 1)
if (num_possible_cpus() == 1 || nopvspin)
xen_pvspin = false;
if (!xen_pvspin) {
......@@ -137,6 +136,7 @@ void __init xen_init_spinlocks(void)
static __init int xen_parse_nopvspin(char *arg)
{
pr_notice("\"xen_nopvspin\" is deprecated, please use \"nopvspin\" instead\n");
xen_pvspin = false;
return 0;
}
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_GENERIC_KVM_TYPES_H
#define _ASM_GENERIC_KVM_TYPES_H
#endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment