Commit 542380a2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
 "ARM:
   - Fix a problem with GICv3 userspace save/restore
   - Clarify GICv2 userspace save/restore ABI
   - Be more careful in clearing GIC LRs
   - Add missing synchronization primitive to our MMU handling code

  PPC:
   - Check for a NULL return from kzalloc

  s390:
   - Prevent translation exception errors on valid page tables for the
     instruction-exection-protection support

  x86:
   - Fix Page-Modification Logging when running a nested guest"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: PPC: Book3S HV: Check for kmalloc errors in ioctl
  KVM: nVMX: initialize PML fields in vmcs02
  KVM: nVMX: do not leak PML full vmexit to L1
  KVM: arm/arm64: vgic: Fix GICC_PMR uaccess on GICv3 and clarify ABI
  KVM: arm64: Ensure LRs are clear when they should be
  kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd
  KVM: s390: remove change-recording override support
  arm/arm64: KVM: Take mmap_sem in kvm_arch_prepare_memory_region
  arm/arm64: KVM: Take mmap_sem in stage2_unmap_vm
parents 62fedca5 8786fa66
...@@ -83,6 +83,12 @@ Groups: ...@@ -83,6 +83,12 @@ Groups:
Bits for undefined preemption levels are RAZ/WI. Bits for undefined preemption levels are RAZ/WI.
For historical reasons and to provide ABI compatibility with userspace we
export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask
field in the lower 5 bits of a word, meaning that userspace must always
use the lower 5 bits to communicate with the KVM device and must shift the
value left by 3 places to obtain the actual priority mask level.
Limitations: Limitations:
- Priorities are not implemented, and registers are RAZ/WI - Priorities are not implemented, and registers are RAZ/WI
- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
......
...@@ -1124,6 +1124,9 @@ static void cpu_hyp_reinit(void) ...@@ -1124,6 +1124,9 @@ static void cpu_hyp_reinit(void)
if (__hyp_get_vectors() == hyp_default_vectors) if (__hyp_get_vectors() == hyp_default_vectors)
cpu_init_hyp_mode(NULL); cpu_init_hyp_mode(NULL);
} }
if (vgic_present)
kvm_vgic_init_cpu_hardware();
} }
static void cpu_hyp_reset(void) static void cpu_hyp_reset(void)
......
...@@ -292,11 +292,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) ...@@ -292,11 +292,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
phys_addr_t addr = start, end = start + size; phys_addr_t addr = start, end = start + size;
phys_addr_t next; phys_addr_t next;
assert_spin_locked(&kvm->mmu_lock);
pgd = kvm->arch.pgd + stage2_pgd_index(addr); pgd = kvm->arch.pgd + stage2_pgd_index(addr);
do { do {
next = stage2_pgd_addr_end(addr, end); next = stage2_pgd_addr_end(addr, end);
if (!stage2_pgd_none(*pgd)) if (!stage2_pgd_none(*pgd))
unmap_stage2_puds(kvm, pgd, addr, next); unmap_stage2_puds(kvm, pgd, addr, next);
/*
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
if (next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
} }
...@@ -803,6 +810,7 @@ void stage2_unmap_vm(struct kvm *kvm) ...@@ -803,6 +810,7 @@ void stage2_unmap_vm(struct kvm *kvm)
int idx; int idx;
idx = srcu_read_lock(&kvm->srcu); idx = srcu_read_lock(&kvm->srcu);
down_read(&current->mm->mmap_sem);
spin_lock(&kvm->mmu_lock); spin_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm); slots = kvm_memslots(kvm);
...@@ -810,6 +818,7 @@ void stage2_unmap_vm(struct kvm *kvm) ...@@ -810,6 +818,7 @@ void stage2_unmap_vm(struct kvm *kvm)
stage2_unmap_memslot(kvm, memslot); stage2_unmap_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock); spin_unlock(&kvm->mmu_lock);
up_read(&current->mm->mmap_sem);
srcu_read_unlock(&kvm->srcu, idx); srcu_read_unlock(&kvm->srcu, idx);
} }
...@@ -829,7 +838,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm) ...@@ -829,7 +838,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
if (kvm->arch.pgd == NULL) if (kvm->arch.pgd == NULL)
return; return;
spin_lock(&kvm->mmu_lock);
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
spin_unlock(&kvm->mmu_lock);
/* Free the HW pgd, one page at a time */ /* Free the HW pgd, one page at a time */
free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE); free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
kvm->arch.pgd = NULL; kvm->arch.pgd = NULL;
...@@ -1801,6 +1813,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ...@@ -1801,6 +1813,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
(KVM_PHYS_SIZE >> PAGE_SHIFT)) (KVM_PHYS_SIZE >> PAGE_SHIFT))
return -EFAULT; return -EFAULT;
down_read(&current->mm->mmap_sem);
/* /*
* A memory region could potentially cover multiple VMAs, and any holes * A memory region could potentially cover multiple VMAs, and any holes
* between them, so iterate over all of them to find out if we can map * between them, so iterate over all of them to find out if we can map
...@@ -1844,8 +1857,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ...@@ -1844,8 +1857,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
pa += vm_start - vma->vm_start; pa += vm_start - vma->vm_start;
/* IO region dirty page logging not allowed */ /* IO region dirty page logging not allowed */
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
return -EINVAL; ret = -EINVAL;
goto out;
}
ret = kvm_phys_addr_ioremap(kvm, gpa, pa, ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
vm_end - vm_start, vm_end - vm_start,
...@@ -1857,7 +1872,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ...@@ -1857,7 +1872,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
} while (hva < reg_end); } while (hva < reg_end);
if (change == KVM_MR_FLAGS_ONLY) if (change == KVM_MR_FLAGS_ONLY)
return ret; goto out;
spin_lock(&kvm->mmu_lock); spin_lock(&kvm->mmu_lock);
if (ret) if (ret)
...@@ -1865,6 +1880,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ...@@ -1865,6 +1880,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
else else
stage2_flush_memslot(kvm, memslot); stage2_flush_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock); spin_unlock(&kvm->mmu_lock);
out:
up_read(&current->mm->mmap_sem);
return ret; return ret;
} }
......
...@@ -1487,6 +1487,10 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, ...@@ -1487,6 +1487,10 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
/* start new resize */ /* start new resize */
resize = kzalloc(sizeof(*resize), GFP_KERNEL); resize = kzalloc(sizeof(*resize), GFP_KERNEL);
if (!resize) {
ret = -ENOMEM;
goto out;
}
resize->order = shift; resize->order = shift;
resize->kvm = kvm; resize->kvm = kvm;
INIT_WORK(&resize->work, resize_hpt_prepare_work); INIT_WORK(&resize->work, resize_hpt_prepare_work);
......
...@@ -168,8 +168,7 @@ union page_table_entry { ...@@ -168,8 +168,7 @@ union page_table_entry {
unsigned long z : 1; /* Zero Bit */ unsigned long z : 1; /* Zero Bit */
unsigned long i : 1; /* Page-Invalid Bit */ unsigned long i : 1; /* Page-Invalid Bit */
unsigned long p : 1; /* DAT-Protection Bit */ unsigned long p : 1; /* DAT-Protection Bit */
unsigned long co : 1; /* Change-Recording Override */ unsigned long : 9;
unsigned long : 8;
}; };
}; };
...@@ -745,8 +744,6 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, ...@@ -745,8 +744,6 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_PAGE_TRANSLATION; return PGM_PAGE_TRANSLATION;
if (pte.z) if (pte.z)
return PGM_TRANSLATION_SPEC; return PGM_TRANSLATION_SPEC;
if (pte.co && !edat1)
return PGM_TRANSLATION_SPEC;
dat_protection |= pte.p; dat_protection |= pte.p;
raddr.pfra = pte.pfra; raddr.pfra = pte.pfra;
real_address: real_address:
...@@ -1182,7 +1179,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, ...@@ -1182,7 +1179,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val); rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
if (!rc && pte.i) if (!rc && pte.i)
rc = PGM_PAGE_TRANSLATION; rc = PGM_PAGE_TRANSLATION;
if (!rc && (pte.z || (pte.co && sg->edat_level < 1))) if (!rc && pte.z)
rc = PGM_TRANSLATION_SPEC; rc = PGM_TRANSLATION_SPEC;
shadow_page: shadow_page:
pte.p |= dat_protection; pte.p |= dat_protection;
......
...@@ -8198,6 +8198,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) ...@@ -8198,6 +8198,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
case EXIT_REASON_PREEMPTION_TIMER: case EXIT_REASON_PREEMPTION_TIMER:
return false; return false;
case EXIT_REASON_PML_FULL:
/* We don't expose PML support to L1. */
return false;
default: default:
return true; return true;
} }
...@@ -10267,6 +10270,18 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, ...@@ -10267,6 +10270,18 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
} }
if (enable_pml) {
/*
* Conceptually we want to copy the PML address and index from
* vmcs01 here, and then back to vmcs01 on nested vmexit. But,
* since we always flush the log on each vmexit, this happens
* to be equivalent to simply resetting the fields in vmcs02.
*/
ASSERT(vmx->pml_pg);
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
}
if (nested_cpu_has_ept(vmcs12)) { if (nested_cpu_has_ept(vmcs12)) {
kvm_mmu_unload(vcpu); kvm_mmu_unload(vcpu);
nested_ept_init_mmu_context(vcpu); nested_ept_init_mmu_context(vcpu);
......
...@@ -295,6 +295,7 @@ void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu); ...@@ -295,6 +295,7 @@ void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
int kvm_vgic_map_resources(struct kvm *kvm); int kvm_vgic_map_resources(struct kvm *kvm);
int kvm_vgic_hyp_init(void); int kvm_vgic_hyp_init(void);
void kvm_vgic_init_cpu_hardware(void);
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
bool level); bool level);
......
...@@ -96,6 +96,9 @@ ...@@ -96,6 +96,9 @@
#define GICH_MISR_EOI (1 << 0) #define GICH_MISR_EOI (1 << 0)
#define GICH_MISR_U (1 << 1) #define GICH_MISR_U (1 << 1)
#define GICV_PMR_PRIORITY_SHIFT 3
#define GICV_PMR_PRIORITY_MASK (0x1f << GICV_PMR_PRIORITY_SHIFT)
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <linux/irqdomain.h> #include <linux/irqdomain.h>
......
...@@ -391,6 +391,25 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) ...@@ -391,6 +391,25 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
/**
* kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
*
* For a specific CPU, initialize the GIC VE hardware.
*/
void kvm_vgic_init_cpu_hardware(void)
{
BUG_ON(preemptible());
/*
* We want to make sure the list registers start out clear so that we
* only have the program the used registers.
*/
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_init_lrs();
else
kvm_call_hyp(__vgic_v3_init_lrs);
}
/** /**
* kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
* according to the host GIC model. Accordingly calls either * according to the host GIC model. Accordingly calls either
......
...@@ -229,7 +229,15 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu, ...@@ -229,7 +229,15 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
val = vmcr.ctlr; val = vmcr.ctlr;
break; break;
case GIC_CPU_PRIMASK: case GIC_CPU_PRIMASK:
val = vmcr.pmr; /*
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
* the PMR field as GICH_VMCR.VMPriMask rather than
* GICC_PMR.Priority, so we expose the upper five bits of
* priority mask to userspace using the lower bits in the
* unsigned long.
*/
val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >>
GICV_PMR_PRIORITY_SHIFT;
break; break;
case GIC_CPU_BINPOINT: case GIC_CPU_BINPOINT:
val = vmcr.bpr; val = vmcr.bpr;
...@@ -262,7 +270,15 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, ...@@ -262,7 +270,15 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
vmcr.ctlr = val; vmcr.ctlr = val;
break; break;
case GIC_CPU_PRIMASK: case GIC_CPU_PRIMASK:
vmcr.pmr = val; /*
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
* the PMR field as GICH_VMCR.VMPriMask rather than
* GICC_PMR.Priority, so we expose the upper five bits of
* priority mask to userspace using the lower bits in the
* unsigned long.
*/
vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) &
GICV_PMR_PRIORITY_MASK;
break; break;
case GIC_CPU_BINPOINT: case GIC_CPU_BINPOINT:
vmcr.bpr = val; vmcr.bpr = val;
......
...@@ -36,6 +36,21 @@ static unsigned long *u64_to_bitmask(u64 *val) ...@@ -36,6 +36,21 @@ static unsigned long *u64_to_bitmask(u64 *val)
return (unsigned long *)val; return (unsigned long *)val;
} }
static inline void vgic_v2_write_lr(int lr, u32 val)
{
void __iomem *base = kvm_vgic_global_state.vctrl_base;
writel_relaxed(val, base + GICH_LR0 + (lr * 4));
}
void vgic_v2_init_lrs(void)
{
int i;
for (i = 0; i < kvm_vgic_global_state.nr_lr; i++)
vgic_v2_write_lr(i, 0);
}
void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu) void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
{ {
struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
...@@ -191,8 +206,8 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) ...@@ -191,8 +206,8 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GICH_VMCR_ALIAS_BINPOINT_MASK; GICH_VMCR_ALIAS_BINPOINT_MASK;
vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
GICH_VMCR_BINPOINT_MASK; GICH_VMCR_BINPOINT_MASK;
vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) <<
GICH_VMCR_PRIMASK_MASK; GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
} }
...@@ -207,8 +222,8 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) ...@@ -207,8 +222,8 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GICH_VMCR_ALIAS_BINPOINT_SHIFT; GICH_VMCR_ALIAS_BINPOINT_SHIFT;
vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
GICH_VMCR_BINPOINT_SHIFT; GICH_VMCR_BINPOINT_SHIFT;
vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> vmcrp->pmr = ((vmcr & GICH_VMCR_PRIMASK_MASK) >>
GICH_VMCR_PRIMASK_SHIFT; GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT;
} }
void vgic_v2_enable(struct kvm_vcpu *vcpu) void vgic_v2_enable(struct kvm_vcpu *vcpu)
......
...@@ -81,11 +81,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq) ...@@ -81,11 +81,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
return irq->pending_latch || irq->line_level; return irq->pending_latch || irq->line_level;
} }
/*
* This struct provides an intermediate representation of the fields contained
* in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
* state to userspace can generate either GICv2 or GICv3 CPU interface
* registers regardless of the hardware backed GIC used.
*/
struct vgic_vmcr { struct vgic_vmcr {
u32 ctlr; u32 ctlr;
u32 abpr; u32 abpr;
u32 bpr; u32 bpr;
u32 pmr; u32 pmr; /* Priority mask field in the GICC_PMR and
* ICC_PMR_EL1 priority field format */
/* Below member variable are valid only for GICv3 */ /* Below member variable are valid only for GICv3 */
u32 grpen0; u32 grpen0;
u32 grpen1; u32 grpen1;
...@@ -130,6 +137,8 @@ int vgic_v2_map_resources(struct kvm *kvm); ...@@ -130,6 +137,8 @@ int vgic_v2_map_resources(struct kvm *kvm);
int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
enum vgic_type); enum vgic_type);
void vgic_v2_init_lrs(void);
static inline void vgic_get_irq_kref(struct vgic_irq *irq) static inline void vgic_get_irq_kref(struct vgic_irq *irq)
{ {
if (irq->intid < VGIC_MIN_LPI) if (irq->intid < VGIC_MIN_LPI)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment