Commit bfe91da2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "Bugfixes and a one-liner patch to silence a sparse warning"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: arm64: Stop clobbering x0 for HVC_SOFT_RESTART
  KVM: arm64: PMU: Fix per-CPU access in preemptible context
  KVM: VMX: Use KVM_POSSIBLE_CR*_GUEST_BITS to initialize guest/host masks
  KVM: x86: Mark CR4.TSD as being possibly owned by the guest
  KVM: x86: Inject #GP if guest attempts to toggle CR4.LA57 in 64-bit mode
  kvm: use more precise cast and do not drop __user
  KVM: x86: bit 8 of non-leaf PDPEs is not reserved
  KVM: X86: Fix async pf caused null-ptr-deref
  KVM: arm64: vgic-v4: Plug race between non-residency and v4.1 doorbell
  KVM: arm64: pvtime: Ensure task delay accounting is enabled
  KVM: arm64: Fix kvm_reset_vcpu() return code being incorrect with SVE
  KVM: arm64: Annotate hyp NMI-related functions as __always_inline
  KVM: s390: reduce number of IO pins to 1
parents 5c82ec00 8038a922
......@@ -109,7 +109,7 @@ static inline u32 gic_read_pmr(void)
return read_sysreg_s(SYS_ICC_PMR_EL1);
}
static inline void gic_write_pmr(u32 val)
static __always_inline void gic_write_pmr(u32 val)
{
write_sysreg_s(val, SYS_ICC_PMR_EL1);
}
......
......@@ -675,7 +675,7 @@ static inline bool system_supports_generic_auth(void)
cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH);
}
static inline bool system_uses_irq_prio_masking(void)
static __always_inline bool system_uses_irq_prio_masking(void)
{
return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) &&
cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING);
......
......@@ -136,11 +136,15 @@ SYM_CODE_START(__kvm_handle_stub_hvc)
1: cmp x0, #HVC_RESET_VECTORS
b.ne 1f
reset:
/*
* Reset kvm back to the hyp stub. Do not clobber x0-x4 in
* case we coming via HVC_SOFT_RESTART.
* Set the HVC_RESET_VECTORS return code before entering the common
* path so that we do not clobber x0-x2 in case we are coming via
* HVC_SOFT_RESTART.
*/
mov x0, xzr
reset:
/* Reset kvm back to the hyp stub. */
mrs x5, sctlr_el2
mov_q x6, SCTLR_ELx_FLAGS
bic x5, x5, x6 // Clear SCTL_M and etc
......@@ -151,7 +155,6 @@ reset:
/* Install stub vectors */
adr_l x5, __hyp_stub_vectors
msr vbar_el2, x5
mov x0, xzr
eret
1: /* Bad stub call */
......
......@@ -159,7 +159,10 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events)
}
/*
* On VHE ensure that only guest events have EL0 counting enabled
* On VHE ensure that only guest events have EL0 counting enabled.
* This is called from both vcpu_{load,put} and the sysreg handling.
* Since the latter is preemptible, special care must be taken to
* disable preemption.
*/
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
{
......@@ -169,12 +172,14 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
if (!has_vhe())
return;
preempt_disable();
host = this_cpu_ptr(&kvm_host_data);
events_guest = host->pmu_events.events_guest;
events_host = host->pmu_events.events_host;
kvm_vcpu_pmu_enable_el0(events_guest);
kvm_vcpu_pmu_disable_el0(events_host);
preempt_enable();
}
/*
......
......@@ -3,6 +3,7 @@
#include <linux/arm-smccc.h>
#include <linux/kvm_host.h>
#include <linux/sched/stat.h>
#include <asm/kvm_mmu.h>
#include <asm/pvclock-abi.h>
......@@ -73,6 +74,11 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
return base;
}
static bool kvm_arm_pvtime_supported(void)
{
return !!sched_info_on();
}
int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
......@@ -82,7 +88,8 @@ int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
int ret = 0;
int idx;
if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA)
if (!kvm_arm_pvtime_supported() ||
attr->attr != KVM_ARM_VCPU_PVTIME_IPA)
return -ENXIO;
if (get_user(ipa, user))
......@@ -110,7 +117,8 @@ int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
u64 __user *user = (u64 __user *)attr->addr;
u64 ipa;
if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA)
if (!kvm_arm_pvtime_supported() ||
attr->attr != KVM_ARM_VCPU_PVTIME_IPA)
return -ENXIO;
ipa = vcpu->arch.steal.base;
......@@ -125,7 +133,8 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
{
switch (attr->attr) {
case KVM_ARM_VCPU_PVTIME_IPA:
return 0;
if (kvm_arm_pvtime_supported())
return 0;
}
return -ENXIO;
}
......@@ -245,7 +245,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
*/
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
int ret = -EINVAL;
int ret;
bool loaded;
u32 pstate;
......@@ -269,15 +269,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) {
if (kvm_vcpu_enable_ptrauth(vcpu))
if (kvm_vcpu_enable_ptrauth(vcpu)) {
ret = -EINVAL;
goto out;
}
}
switch (vcpu->arch.target) {
default:
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1))
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) {
ret = -EINVAL;
goto out;
}
pstate = VCPU_RESET_PSTATE_SVC;
} else {
pstate = VCPU_RESET_PSTATE_EL1;
......
......@@ -90,7 +90,15 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
!irqd_irq_disabled(&irq_to_desc(irq)->irq_data))
disable_irq_nosync(irq);
/*
* The v4.1 doorbell can fire concurrently with the vPE being
* made non-resident. Ensure we only update pending_last
* *after* the non-residency sequence has completed.
*/
raw_spin_lock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock);
vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
raw_spin_unlock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock);
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
......
......@@ -31,12 +31,12 @@
#define KVM_USER_MEM_SLOTS 32
/*
* These seem to be used for allocating ->chip in the routing table,
* which we don't use. 4096 is an out-of-thin-air value. If we need
* to look at ->chip later on, we'll need to revisit this.
* These seem to be used for allocating ->chip in the routing table, which we
* don't use. 1 is as small as we can get to reduce the needed memory. If we
* need to look at ->chip later on, we'll need to revisit this.
*/
#define KVM_NR_IRQCHIPS 1
#define KVM_IRQCHIP_NUM_PINS 4096
#define KVM_IRQCHIP_NUM_PINS 1
#define KVM_HALT_POLL_NS_DEFAULT 50000
/* s390-specific vcpu->requests bit members */
......
......@@ -7,7 +7,7 @@
#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
#define KVM_POSSIBLE_CR4_GUEST_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE)
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE | X86_CR4_TSD)
#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \
static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
......
......@@ -4449,7 +4449,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
nonleaf_bit8_rsvd | gbpages_bit_rsvd |
gbpages_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
......
......@@ -4109,7 +4109,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* CR0_GUEST_HOST_MASK is already set in the original vmcs01
* (KVM doesn't change it);
*/
vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
vmx_set_cr0(vcpu, vmcs12->host_cr0);
/* Same as above - no reason to call set_cr4_guest_host_mask(). */
......@@ -4259,7 +4259,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
*/
vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
......
......@@ -133,9 +133,6 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
#define KVM_VM_CR0_ALWAYS_ON \
(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \
X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
#define KVM_CR4_GUEST_OWNED_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
......@@ -4034,9 +4031,9 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
{
vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
if (enable_ept)
vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
vmx->vcpu.arch.cr4_guest_owned_bits = KVM_POSSIBLE_CR4_GUEST_BITS;
if (!enable_ept)
vmx->vcpu.arch.cr4_guest_owned_bits &= ~X86_CR4_PGE;
if (is_guest_mode(&vmx->vcpu))
vmx->vcpu.arch.cr4_guest_owned_bits &=
~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
......@@ -4333,8 +4330,8 @@ static void init_vmcs(struct vcpu_vmx *vmx)
/* 22.2.1, 20.8.1 */
vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits);
set_cr4_guest_host_mask(vmx);
......
......@@ -975,6 +975,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (is_long_mode(vcpu)) {
if (!(cr4 & X86_CR4_PAE))
return 1;
if ((cr4 ^ old_cr4) & X86_CR4_LA57)
return 1;
} else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
&& ((cr4 ^ old_cr4) & pdptr_bits)
&& !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
......@@ -2693,6 +2695,9 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
if (data & 0x30)
return 1;
if (!lapic_in_kernel(vcpu))
return 1;
vcpu->arch.apf.msr_en_val = data;
if (!kvm_pv_async_pf_enabled(vcpu)) {
......
......@@ -4054,16 +4054,24 @@ static void its_vpe_4_1_deschedule(struct its_vpe *vpe,
u64 val;
if (info->req_db) {
unsigned long flags;
/*
* vPE is going to block: make the vPE non-resident with
* PendingLast clear and DB set. The GIC guarantees that if
* we read-back PendingLast clear, then a doorbell will be
* delivered when an interrupt comes.
*
* Note the locking to deal with the concurrent update of
* pending_last from the doorbell interrupt handler that can
* run concurrently.
*/
raw_spin_lock_irqsave(&vpe->vpe_lock, flags);
val = its_clear_vpend_valid(vlpi_base,
GICR_VPENDBASER_PendingLast,
GICR_VPENDBASER_4_1_DB);
vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
} else {
/*
* We're not blocking, so just make the vPE non-resident
......
......@@ -3350,7 +3350,8 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
if (kvm_sigmask.len != sizeof(compat_sigset_t))
goto out;
r = -EFAULT;
if (get_compat_sigset(&sigset, (void *)sigmask_arg->sigset))
if (get_compat_sigset(&sigset,
(compat_sigset_t __user *)sigmask_arg->sigset))
goto out;
r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
} else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment