Commit fe37fe2a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Fix dangling references to a redistributor region if the vgic was
     prematurely destroyed.

   - Properly mark FFA buffers as released, ensuring that both parties
     can make forward progress.

  x86:

   - Allow getting/setting MSRs for SEV-ES guests, if they're using the
     pre-6.9 KVM_SEV_ES_INIT API.

   - Always sync pending posted interrupts to the IRR prior to IOAPIC
     route updates, so that EOIs are intercepted properly if the old
     routing table requested that.

  Generic:

   - Avoid __fls(0)

   - Fix reference leak on hwpoisoned page

   - Fix a race in kvm_vcpu_on_spin() by ensuring loads and stores are
     atomic.

   - Fix bug in __kvm_handle_hva_range() where KVM calls a function
     pointer that was intended to be a marker only (nothing bad happens
     but kind of a mine and also technically undefined behavior)

   - Do not bother accounting allocations that are small and freed
     before getting back to userspace.

  Selftests:

   - Fix compilation for RISC-V.

   - Fix a "shift too big" goof in the KVM_SEV_INIT2 selftest.

   - Compute the max mappable gfn for KVM selftests on x86 using
     GuestMaxPhyAddr from KVM's supported CPUID (if it's available)"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: SEV-ES: Fix svm_get_msr()/svm_set_msr() for KVM_SEV_ES_INIT guests
  KVM: Discard zero mask with function kvm_dirty_ring_reset
  virt: guest_memfd: fix reference leak on hwpoisoned page
  kvm: do not account temporary allocations to kmem
  MAINTAINERS: Drop Wanpeng Li as a Reviewer for KVM Paravirt support
  KVM: x86: Always sync PIR to IRR prior to scanning I/O APIC routes
  KVM: Stop processing *all* memslots when "null" mmu_notifier handler is found
  KVM: arm64: FFA: Release hyp rx buffer
  KVM: selftests: Fix RISC-V compilation
  KVM: arm64: Disassociate vcpus from redistributor region on teardown
  KVM: Fix a data race on last_boosted_vcpu in kvm_vcpu_on_spin()
  KVM: selftests: x86: Prioritize getting max_gfn from GuestPhysBits
  KVM: selftests: Fix shift of 32 bit unsigned int more than 32 bits
parents 35bb670d e159d63e
......@@ -12382,7 +12382,6 @@ F: drivers/video/backlight/ktz8866.c
KVM PARAVIRT (KVM/paravirt)
M: Paolo Bonzini <pbonzini@redhat.com>
R: Wanpeng Li <wanpengli@tencent.com>
R: Vitaly Kuznetsov <vkuznets@redhat.com>
L: kvm@vger.kernel.org
S: Supported
......
......@@ -177,6 +177,14 @@ static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len)
res);
}
static void ffa_rx_release(struct arm_smccc_res *res)
{
arm_smccc_1_1_smc(FFA_RX_RELEASE,
0, 0,
0, 0, 0, 0, 0,
res);
}
static void do_ffa_rxtx_map(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
......@@ -543,16 +551,19 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
if (WARN_ON(offset > len ||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)) {
ret = FFA_RET_ABORTED;
ffa_rx_release(res);
goto out_unlock;
}
if (len > ffa_desc_buf.len) {
ret = FFA_RET_NO_MEMORY;
ffa_rx_release(res);
goto out_unlock;
}
buf = ffa_desc_buf.buf;
memcpy(buf, hyp_buffers.rx, fraglen);
ffa_rx_release(res);
for (fragoff = fraglen; fragoff < len; fragoff += fraglen) {
ffa_mem_frag_rx(res, handle_lo, handle_hi, fragoff);
......@@ -563,6 +574,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
fraglen = res->a3;
memcpy((void *)buf + fragoff, hyp_buffers.rx, fraglen);
ffa_rx_release(res);
}
ffa_mem_reclaim(res, handle_lo, handle_hi, flags);
......
......@@ -391,7 +391,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list)
vgic_v3_free_redist_region(rdreg);
vgic_v3_free_redist_region(kvm, rdreg);
INIT_LIST_HEAD(&dist->rd_regions);
} else {
dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
......
......@@ -919,8 +919,19 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index,
return ret;
}
void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg)
void vgic_v3_free_redist_region(struct kvm *kvm, struct vgic_redist_region *rdreg)
{
struct kvm_vcpu *vcpu;
unsigned long c;
lockdep_assert_held(&kvm->arch.config_lock);
/* Garbage collect the region */
kvm_for_each_vcpu(c, vcpu, kvm) {
if (vcpu->arch.vgic_cpu.rdreg == rdreg)
vcpu->arch.vgic_cpu.rdreg = NULL;
}
list_del(&rdreg->list);
kfree(rdreg);
}
......@@ -945,7 +956,7 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
mutex_lock(&kvm->arch.config_lock);
rdreg = vgic_v3_rdist_region_from_index(kvm, index);
vgic_v3_free_redist_region(rdreg);
vgic_v3_free_redist_region(kvm, rdreg);
mutex_unlock(&kvm->arch.config_lock);
return ret;
}
......
......@@ -316,7 +316,7 @@ vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg)
struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
u32 index);
void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg);
void vgic_v3_free_redist_region(struct kvm *kvm, struct vgic_redist_region *rdreg);
bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size);
......
......@@ -2843,7 +2843,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (sev_es_prevent_msr_access(vcpu, msr_info)) {
msr_info->data = 0;
return -EINVAL;
return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
}
switch (msr_info->index) {
......@@ -2998,7 +2998,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u64 data = msr->data;
if (sev_es_prevent_msr_access(vcpu, msr))
return -EINVAL;
return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
switch (ecx) {
case MSR_AMD64_TSC_RATIO:
......
......@@ -10718,13 +10718,12 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
if (irqchip_split(vcpu->kvm))
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
else {
static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
if (ioapic_in_kernel(vcpu->kvm))
else if (ioapic_in_kernel(vcpu->kvm))
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
if (is_guest_mode(vcpu))
vcpu->arch.load_eoi_exitmap_pending = true;
......
......@@ -277,6 +277,7 @@ struct kvm_x86_cpu_property {
#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
......
......@@ -9,6 +9,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "sbi.h"
void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
{
......
......@@ -1247,9 +1247,20 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
{
const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
unsigned long ht_gfn, max_gfn, max_pfn;
uint8_t maxphyaddr;
uint8_t maxphyaddr, guest_maxphyaddr;
max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
/*
* Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR
* enumerates the max _mappable_ GPA, which can be less than the raw
* MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU
* doesn't support 5-level TDP.
*/
guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR);
guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits;
TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits,
"Guest MAXPHYADDR should never be greater than raw MAXPHYADDR");
max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1;
/* Avoid reserved HyperTransport region on AMD processors. */
if (!host_cpu_is_amd)
......
......@@ -6,6 +6,7 @@
*
*/
#include "kvm_util.h"
#include "ucall_common.h"
#define LABEL_ADDRESS(v) ((uint64_t)&(v))
......
......@@ -15,6 +15,7 @@
#include "processor.h"
#include "sbi.h"
#include "arch_timer.h"
#include "ucall_common.h"
/* Maximum counters(firmware + hardware) */
#define RISCV_MAX_PMU_COUNTERS 64
......
......@@ -105,11 +105,11 @@ void test_features(uint32_t vm_type, uint64_t supported_features)
int i;
for (i = 0; i < 64; i++) {
if (!(supported_features & (1u << i)))
if (!(supported_features & BIT_ULL(i)))
test_init2_invalid(vm_type,
&(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) },
"unknown feature");
else if (KNOWN_FEATURES & (1u << i))
else if (KNOWN_FEATURES & BIT_ULL(i))
test_init2(vm_type,
&(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) });
}
......
......@@ -55,6 +55,9 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
struct kvm_memory_slot *memslot;
int as_id, id;
if (!mask)
return;
as_id = slot >> 16;
id = (u16)slot;
......
......@@ -510,8 +510,10 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
}
if (folio_test_hwpoison(folio)) {
folio_unlock(folio);
folio_put(folio);
r = -EHWPOISON;
goto out_unlock;
goto out_fput;
}
page = folio_file_page(folio, index);
......@@ -522,7 +524,6 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
r = 0;
out_unlock:
folio_unlock(folio);
out_fput:
fput(file);
......
......@@ -651,7 +651,7 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
range->on_lock(kvm);
if (IS_KVM_NULL_FN(range->handler))
break;
goto mmu_unlock;
}
r.ret |= range->handler(kvm, &gfn_range);
}
......@@ -660,6 +660,7 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
if (range->flush_on_ret && r.ret)
kvm_flush_remote_tlbs(kvm);
mmu_unlock:
if (r.found_memslot)
KVM_MMU_UNLOCK(kvm);
......@@ -4025,12 +4026,13 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
{
struct kvm *kvm = me->kvm;
struct kvm_vcpu *vcpu;
int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
int last_boosted_vcpu;
unsigned long i;
int yielded = 0;
int try = 3;
int pass;
last_boosted_vcpu = READ_ONCE(kvm->last_boosted_vcpu);
kvm_vcpu_set_in_spin_loop(me, true);
/*
* We boost the priority of a VCPU that is runnable but not
......@@ -4068,7 +4070,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
yielded = kvm_vcpu_yield_to(vcpu);
if (yielded > 0) {
kvm->last_boosted_vcpu = i;
WRITE_ONCE(kvm->last_boosted_vcpu, i);
break;
} else if (yielded < 0) {
try--;
......@@ -4427,7 +4429,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
struct kvm_regs *kvm_regs;
r = -ENOMEM;
kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT);
kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
if (!kvm_regs)
goto out;
r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
......@@ -4454,8 +4456,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
break;
}
case KVM_GET_SREGS: {
kvm_sregs = kzalloc(sizeof(struct kvm_sregs),
GFP_KERNEL_ACCOUNT);
kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
r = -ENOMEM;
if (!kvm_sregs)
goto out;
......@@ -4547,7 +4548,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
break;
}
case KVM_GET_FPU: {
fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT);
fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
r = -ENOMEM;
if (!fpu)
goto out;
......@@ -6210,7 +6211,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
active = kvm_active_vms;
mutex_unlock(&kvm_lock);
env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT);
env = kzalloc(sizeof(*env), GFP_KERNEL);
if (!env)
return;
......@@ -6226,7 +6227,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
add_uevent_var(env, "PID=%d", kvm->userspace_pid);
if (!IS_ERR(kvm->debugfs_dentry)) {
char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL);
if (p) {
tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment