Commit 2e25aa5f authored by Paul Mackerras's avatar Paul Mackerras Committed by Avi Kivity

KVM: PPC: Book3S HV: Make virtual processor area registration more robust

The PAPR API allows three sorts of per-virtual-processor areas to be
registered (VPA, SLB shadow buffer, and dispatch trace log), and
furthermore, these can be registered and unregistered for another
virtual CPU.  Currently we just update the vcpu fields pointing to
these areas at the time of registration or unregistration.  If this
is done on another vcpu, there is the possibility that the target vcpu
is using those fields at the time and could end up using a bogus
pointer and corrupting memory.

This fixes the race by making the target cpu itself do the update, so
we can be sure that the update happens at a time when the fields
aren't being used.  Each area now has a struct kvmppc_vpa which is
used to manage these updates.  There is also a spinlock which protects
access to all of the kvmppc_vpa structs, other than to the pinned_addr
fields.  (We could have just taken the spinlock when using the vpa,
slb_shadow or dtl fields, but that would mean taking the spinlock on
every guest entry and exit.)

This also changes 'struct dtl' (which was undefined) to 'struct dtl_entry',
which is what the rest of the kernel uses.

Thanks to Michael Ellerman <michael@ellerman.id.au> for pointing out
the need to initialize vcpu->arch.vpa_update_lock.
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarAlexander Graf <agraf@suse.de>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent f0888f70
...@@ -114,6 +114,16 @@ ...@@ -114,6 +114,16 @@
#define H_PP1 (1UL<<(63-62)) #define H_PP1 (1UL<<(63-62))
#define H_PP2 (1UL<<(63-63)) #define H_PP2 (1UL<<(63-63))
/* Flags for H_REGISTER_VPA subfunction field */
#define H_VPA_FUNC_SHIFT (63-18) /* Bit posn of subfunction code */
#define H_VPA_FUNC_MASK 7UL
#define H_VPA_REG_VPA 1UL /* Register Virtual Processor Area */
#define H_VPA_REG_DTL 2UL /* Register Dispatch Trace Log */
#define H_VPA_REG_SLB 3UL /* Register SLB shadow buffer */
#define H_VPA_DEREG_VPA 5UL /* Deregister Virtual Processor Area */
#define H_VPA_DEREG_DTL 6UL /* Deregister Dispatch Trace Log */
#define H_VPA_DEREG_SLB 7UL /* Deregister SLB shadow buffer */
/* VASI States */ /* VASI States */
#define H_VASI_INVALID 0 #define H_VASI_INVALID 0
#define H_VASI_ENABLED 1 #define H_VASI_ENABLED 1
......
...@@ -82,7 +82,7 @@ struct kvm_vcpu; ...@@ -82,7 +82,7 @@ struct kvm_vcpu;
struct lppaca; struct lppaca;
struct slb_shadow; struct slb_shadow;
struct dtl; struct dtl_entry;
struct kvm_vm_stat { struct kvm_vm_stat {
u32 remote_tlb_flush; u32 remote_tlb_flush;
...@@ -279,6 +279,19 @@ struct kvmppc_vcore { ...@@ -279,6 +279,19 @@ struct kvmppc_vcore {
#define VCORE_EXITING 2 #define VCORE_EXITING 2
#define VCORE_SLEEPING 3 #define VCORE_SLEEPING 3
/*
* Struct used to manage memory for a virtual processor area
* registered by a PAPR guest. There are three types of area
* that a guest can register.
*/
struct kvmppc_vpa {
void *pinned_addr; /* Address in kernel linear mapping */
void *pinned_end; /* End of region */
unsigned long next_gpa; /* Guest phys addr for update */
unsigned long len; /* Number of bytes required */
u8 update_pending; /* 1 => update pinned_addr from next_gpa */
};
struct kvmppc_pte { struct kvmppc_pte {
ulong eaddr; ulong eaddr;
u64 vpage; u64 vpage;
...@@ -473,11 +486,6 @@ struct kvm_vcpu_arch { ...@@ -473,11 +486,6 @@ struct kvm_vcpu_arch {
u8 prodded; u8 prodded;
u32 last_inst; u32 last_inst;
struct lppaca *vpa;
struct slb_shadow *slb_shadow;
struct dtl *dtl;
struct dtl *dtl_end;
wait_queue_head_t *wqp; wait_queue_head_t *wqp;
struct kvmppc_vcore *vcore; struct kvmppc_vcore *vcore;
int ret; int ret;
...@@ -502,6 +510,13 @@ struct kvm_vcpu_arch { ...@@ -502,6 +510,13 @@ struct kvm_vcpu_arch {
struct task_struct *run_task; struct task_struct *run_task;
struct kvm_run *kvm_run; struct kvm_run *kvm_run;
pgd_t *pgdir; pgd_t *pgdir;
spinlock_t vpa_update_lock;
struct kvmppc_vpa vpa;
struct kvmppc_vpa dtl;
struct dtl_entry *dtl_ptr;
unsigned long dtl_index;
struct kvmppc_vpa slb_shadow;
#endif #endif
}; };
......
...@@ -466,7 +466,7 @@ int main(void) ...@@ -466,7 +466,7 @@ int main(void)
DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded)); DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
......
...@@ -134,6 +134,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) ...@@ -134,6 +134,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
vpa->yield_count = 1; vpa->yield_count = 1;
} }
/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
struct reg_vpa {
u32 dummy;
union {
u16 hword;
u32 word;
} length;
};
static int vpa_is_registered(struct kvmppc_vpa *vpap)
{
if (vpap->update_pending)
return vpap->next_gpa != 0;
return vpap->pinned_addr != NULL;
}
static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
unsigned long flags, unsigned long flags,
unsigned long vcpuid, unsigned long vpa) unsigned long vcpuid, unsigned long vpa)
...@@ -142,88 +158,153 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, ...@@ -142,88 +158,153 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
unsigned long len, nb; unsigned long len, nb;
void *va; void *va;
struct kvm_vcpu *tvcpu; struct kvm_vcpu *tvcpu;
int err = H_PARAMETER; int err;
int subfunc;
struct kvmppc_vpa *vpap;
tvcpu = kvmppc_find_vcpu(kvm, vcpuid); tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
if (!tvcpu) if (!tvcpu)
return H_PARAMETER; return H_PARAMETER;
flags >>= 63 - 18; subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
flags &= 7; if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
if (flags == 0 || flags == 4) subfunc == H_VPA_REG_SLB) {
return H_PARAMETER; /* Registering new area - address must be cache-line aligned */
if (flags < 4) { if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
if (vpa & 0x7f)
return H_PARAMETER; return H_PARAMETER;
if (flags >= 2 && !tvcpu->arch.vpa)
return H_RESOURCE; /* convert logical addr to kernel addr and read length */
/* registering new area; convert logical addr to real */
va = kvmppc_pin_guest_page(kvm, vpa, &nb); va = kvmppc_pin_guest_page(kvm, vpa, &nb);
if (va == NULL) if (va == NULL)
return H_PARAMETER; return H_PARAMETER;
if (flags <= 1) if (subfunc == H_VPA_REG_VPA)
len = *(unsigned short *)(va + 4); len = ((struct reg_vpa *)va)->length.hword;
else else
len = *(unsigned int *)(va + 4); len = ((struct reg_vpa *)va)->length.word;
if (len > nb) kvmppc_unpin_guest_page(kvm, va);
goto out_unpin;
switch (flags) { /* Check length */
case 1: /* register VPA */ if (len > nb || len < sizeof(struct reg_vpa))
if (len < 640) return H_PARAMETER;
goto out_unpin; } else {
if (tvcpu->arch.vpa) vpa = 0;
kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa); len = 0;
tvcpu->arch.vpa = va; }
init_vpa(vcpu, va);
break; err = H_PARAMETER;
case 2: /* register DTL */ vpap = NULL;
if (len < 48) spin_lock(&tvcpu->arch.vpa_update_lock);
goto out_unpin;
len -= len % 48; switch (subfunc) {
if (tvcpu->arch.dtl) case H_VPA_REG_VPA: /* register VPA */
kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl); if (len < sizeof(struct lppaca))
tvcpu->arch.dtl = va;
tvcpu->arch.dtl_end = va + len;
break; break;
case 3: /* register SLB shadow buffer */ vpap = &tvcpu->arch.vpa;
if (len < 16) err = 0;
goto out_unpin; break;
if (tvcpu->arch.slb_shadow)
kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow); case H_VPA_REG_DTL: /* register DTL */
tvcpu->arch.slb_shadow = va; if (len < sizeof(struct dtl_entry))
break; break;
} len -= len % sizeof(struct dtl_entry);
} else {
switch (flags) { /* Check that they have previously registered a VPA */
case 5: /* unregister VPA */ err = H_RESOURCE;
if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl) if (!vpa_is_registered(&tvcpu->arch.vpa))
return H_RESOURCE;
if (!tvcpu->arch.vpa)
break;
kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa);
tvcpu->arch.vpa = NULL;
break; break;
case 6: /* unregister DTL */
if (!tvcpu->arch.dtl) vpap = &tvcpu->arch.dtl;
break; err = 0;
kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl); break;
tvcpu->arch.dtl = NULL;
case H_VPA_REG_SLB: /* register SLB shadow buffer */
/* Check that they have previously registered a VPA */
err = H_RESOURCE;
if (!vpa_is_registered(&tvcpu->arch.vpa))
break; break;
case 7: /* unregister SLB shadow buffer */
if (!tvcpu->arch.slb_shadow) vpap = &tvcpu->arch.slb_shadow;
break; err = 0;
kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow); break;
tvcpu->arch.slb_shadow = NULL;
case H_VPA_DEREG_VPA: /* deregister VPA */
/* Check they don't still have a DTL or SLB buf registered */
err = H_RESOURCE;
if (vpa_is_registered(&tvcpu->arch.dtl) ||
vpa_is_registered(&tvcpu->arch.slb_shadow))
break; break;
}
vpap = &tvcpu->arch.vpa;
err = 0;
break;
case H_VPA_DEREG_DTL: /* deregister DTL */
vpap = &tvcpu->arch.dtl;
err = 0;
break;
case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */
vpap = &tvcpu->arch.slb_shadow;
err = 0;
break;
} }
return H_SUCCESS;
out_unpin: if (vpap) {
kvmppc_unpin_guest_page(kvm, va); vpap->next_gpa = vpa;
vpap->len = len;
vpap->update_pending = 1;
}
spin_unlock(&tvcpu->arch.vpa_update_lock);
return err; return err;
} }
static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap)
{
void *va;
unsigned long nb;
vpap->update_pending = 0;
va = NULL;
if (vpap->next_gpa) {
va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
if (nb < vpap->len) {
/*
* If it's now too short, it must be that userspace
* has changed the mappings underlying guest memory,
* so unregister the region.
*/
kvmppc_unpin_guest_page(kvm, va);
va = NULL;
}
}
if (vpap->pinned_addr)
kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
vpap->pinned_addr = va;
if (va)
vpap->pinned_end = va + vpap->len;
}
static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
spin_lock(&vcpu->arch.vpa_update_lock);
if (vcpu->arch.vpa.update_pending) {
kvmppc_update_vpa(kvm, &vcpu->arch.vpa);
init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
}
if (vcpu->arch.dtl.update_pending) {
kvmppc_update_vpa(kvm, &vcpu->arch.dtl);
vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
vcpu->arch.dtl_index = 0;
}
if (vcpu->arch.slb_shadow.update_pending)
kvmppc_update_vpa(kvm, &vcpu->arch.slb_shadow);
spin_unlock(&vcpu->arch.vpa_update_lock);
}
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
{ {
unsigned long req = kvmppc_get_gpr(vcpu, 3); unsigned long req = kvmppc_get_gpr(vcpu, 3);
...@@ -468,6 +549,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) ...@@ -468,6 +549,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
/* default to host PVR, since we can't spoof it */ /* default to host PVR, since we can't spoof it */
vcpu->arch.pvr = mfspr(SPRN_PVR); vcpu->arch.pvr = mfspr(SPRN_PVR);
kvmppc_set_pvr(vcpu, vcpu->arch.pvr); kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
spin_lock_init(&vcpu->arch.vpa_update_lock);
kvmppc_mmu_book3s_hv_init(vcpu); kvmppc_mmu_book3s_hv_init(vcpu);
...@@ -512,12 +594,14 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) ...@@ -512,12 +594,14 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
{ {
if (vcpu->arch.dtl) spin_lock(&vcpu->arch.vpa_update_lock);
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl); if (vcpu->arch.dtl.pinned_addr)
if (vcpu->arch.slb_shadow) kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow); if (vcpu->arch.slb_shadow.pinned_addr)
if (vcpu->arch.vpa) kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa); if (vcpu->arch.vpa.pinned_addr)
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
spin_unlock(&vcpu->arch.vpa_update_lock);
kvm_vcpu_uninit(vcpu); kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu);
} }
...@@ -722,8 +806,13 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) ...@@ -722,8 +806,13 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
vc->in_guest = 0; vc->in_guest = 0;
vc->pcpu = smp_processor_id(); vc->pcpu = smp_processor_id();
vc->napping_threads = 0; vc->napping_threads = 0;
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
kvmppc_start_thread(vcpu); kvmppc_start_thread(vcpu);
if (vcpu->arch.vpa.update_pending ||
vcpu->arch.slb_shadow.update_pending ||
vcpu->arch.dtl.update_pending)
kvmppc_update_vpas(vcpu);
}
/* Grab any remaining hw threads so they can't go into the kernel */ /* Grab any remaining hw threads so they can't go into the kernel */
for (i = ptid; i < threads_per_core; ++i) for (i = ptid; i < threads_per_core; ++i)
kvmppc_grab_hwthread(vc->pcpu + i); kvmppc_grab_hwthread(vc->pcpu + i);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment