Commit 19ccb76a authored by Paul Mackerras's avatar Paul Mackerras Committed by Avi Kivity

KVM: PPC: Implement H_CEDE hcall for book3s_hv in real-mode code

With a KVM guest operating in SMT4 mode (i.e. 4 hardware threads per
core), whenever a CPU goes idle, we have to pull all the other
hardware threads in the core out of the guest, because the H_CEDE
hcall is handled in the kernel.  This is inefficient.

This adds code to book3s_hv_rmhandlers.S to handle the H_CEDE hcall
in real mode.  When a guest vcpu does an H_CEDE hcall, we now only
exit to the kernel if all the other vcpus in the same core are also
idle.  Otherwise we mark this vcpu as napping, save state that could
be lost in nap mode (mainly GPRs and FPRs), and execute the nap
instruction.  When the thread wakes up, because of a decrementer or
external interrupt, we come back in at kvm_start_guest (from the
system reset interrupt vector), find the `napping' flag set in the
paca, and go to the resume path.

This has some other ramifications.  First, when starting a core, we
now start all the threads, both those that are immediately runnable and
those that are idle.  This is so that we don't have to pull all the
threads out of the guest when an idle thread gets a decrementer interrupt
and wants to start running.  In fact the idle threads will all start
with the H_CEDE hcall returning; being idle they will just do another
H_CEDE immediately and go to nap mode.

This required some changes to kvmppc_run_core() and kvmppc_run_vcpu().
These functions have been restructured to make them simpler and clearer.
We introduce a level of indirection in the wait queue that gets woken
when external and decrementer interrupts get generated for a vcpu, so
that we can have the 4 vcpus in a vcore using the same wait queue.
We need this because the 4 vcpus are being handled by one thread.

Secondly, when we need to exit from the guest to the kernel, we now
have to generate an IPI for any napping threads, because an HDEC
interrupt doesn't wake up a napping thread.

Thirdly, we now need to be able to handle virtual external interrupts
and decrementer interrupts becoming pending while a thread is napping,
and deliver those interrupts to the guest when the thread wakes.
This is done in kvmppc_cede_reentry, just before fast_guest_return.

Finally, since we are not using the generic kvm_vcpu_block for book3s_hv,
and hence not calling kvm_arch_vcpu_runnable, we can remove the #ifdef
from kvm_arch_vcpu_runnable.
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarAlexander Graf <agraf@suse.de>
parent 02143947
...@@ -76,6 +76,7 @@ struct kvmppc_host_state { ...@@ -76,6 +76,7 @@ struct kvmppc_host_state {
ulong scratch1; ulong scratch1;
u8 in_guest; u8 in_guest;
u8 restore_hid5; u8 restore_hid5;
u8 napping;
#ifdef CONFIG_KVM_BOOK3S_64_HV #ifdef CONFIG_KVM_BOOK3S_64_HV
struct kvm_vcpu *kvm_vcpu; struct kvm_vcpu *kvm_vcpu;
......
...@@ -198,21 +198,29 @@ struct kvm_arch { ...@@ -198,21 +198,29 @@ struct kvm_arch {
*/ */
struct kvmppc_vcore { struct kvmppc_vcore {
int n_runnable; int n_runnable;
int n_blocked; int n_busy;
int num_threads; int num_threads;
int entry_exit_count; int entry_exit_count;
int n_woken; int n_woken;
int nap_count; int nap_count;
int napping_threads;
u16 pcpu; u16 pcpu;
u8 vcore_running; u8 vcore_state;
u8 in_guest; u8 in_guest;
struct list_head runnable_threads; struct list_head runnable_threads;
spinlock_t lock; spinlock_t lock;
wait_queue_head_t wq;
}; };
#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
#define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8) #define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8)
/* Values for vcore_state */
#define VCORE_INACTIVE 0
#define VCORE_RUNNING 1
#define VCORE_EXITING 2
#define VCORE_SLEEPING 3
struct kvmppc_pte { struct kvmppc_pte {
ulong eaddr; ulong eaddr;
u64 vpage; u64 vpage;
...@@ -403,11 +411,13 @@ struct kvm_vcpu_arch { ...@@ -403,11 +411,13 @@ struct kvm_vcpu_arch {
struct dtl *dtl; struct dtl *dtl;
struct dtl *dtl_end; struct dtl *dtl_end;
wait_queue_head_t *wqp;
struct kvmppc_vcore *vcore; struct kvmppc_vcore *vcore;
int ret; int ret;
int trap; int trap;
int state; int state;
int ptid; int ptid;
bool timer_running;
wait_queue_head_t cpu_run; wait_queue_head_t cpu_run;
struct kvm_vcpu_arch_shared *shared; struct kvm_vcpu_arch_shared *shared;
...@@ -423,8 +433,9 @@ struct kvm_vcpu_arch { ...@@ -423,8 +433,9 @@ struct kvm_vcpu_arch {
#endif #endif
}; };
#define KVMPPC_VCPU_BUSY_IN_HOST 0 /* Values for vcpu->arch.state */
#define KVMPPC_VCPU_BLOCKED 1 #define KVMPPC_VCPU_STOPPED 0
#define KVMPPC_VCPU_BUSY_IN_HOST 1
#define KVMPPC_VCPU_RUNNABLE 2 #define KVMPPC_VCPU_RUNNABLE 2
#endif /* __POWERPC_KVM_HOST_H__ */ #endif /* __POWERPC_KVM_HOST_H__ */
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include <asm/compat.h> #include <asm/compat.h>
#include <asm/mmu.h> #include <asm/mmu.h>
#include <asm/hvcall.h> #include <asm/hvcall.h>
#include <asm/xics.h>
#endif #endif
#ifdef CONFIG_PPC_ISERIES #ifdef CONFIG_PPC_ISERIES
#include <asm/iseries/alpaca.h> #include <asm/iseries/alpaca.h>
...@@ -460,6 +461,8 @@ int main(void) ...@@ -460,6 +461,8 @@ int main(void)
DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa));
DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
...@@ -475,6 +478,7 @@ int main(void) ...@@ -475,6 +478,7 @@ int main(void)
DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
offsetof(struct kvmppc_vcpu_book3s, vcpu)); offsetof(struct kvmppc_vcpu_book3s, vcpu));
DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
...@@ -532,6 +536,7 @@ int main(void) ...@@ -532,6 +536,7 @@ int main(void)
HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
HSTATE_FIELD(HSTATE_NAPPING, napping);
#ifdef CONFIG_KVM_BOOK3S_64_HV #ifdef CONFIG_KVM_BOOK3S_64_HV
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
...@@ -544,6 +549,7 @@ int main(void) ...@@ -544,6 +549,7 @@ int main(void)
HSTATE_FIELD(HSTATE_DSCR, host_dscr); HSTATE_FIELD(HSTATE_DSCR, host_dscr);
HSTATE_FIELD(HSTATE_DABR, dabr); HSTATE_FIELD(HSTATE_DABR, dabr);
HSTATE_FIELD(HSTATE_DECEXP, dec_expires); HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
DEFINE(IPI_PRIORITY, IPI_PRIORITY);
#endif /* CONFIG_KVM_BOOK3S_64_HV */ #endif /* CONFIG_KVM_BOOK3S_64_HV */
#else /* CONFIG_PPC_BOOK3S */ #else /* CONFIG_PPC_BOOK3S */
......
This diff is collapsed.
This diff is collapsed.
...@@ -39,12 +39,8 @@ ...@@ -39,12 +39,8 @@
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{ {
#ifndef CONFIG_KVM_BOOK3S_64_HV
return !(v->arch.shared->msr & MSR_WE) || return !(v->arch.shared->msr & MSR_WE) ||
!!(v->arch.pending_exceptions); !!(v->arch.pending_exceptions);
#else
return !(v->arch.ceded) || !!(v->arch.pending_exceptions);
#endif
} }
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
...@@ -285,6 +281,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) ...@@ -285,6 +281,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{ {
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
vcpu = kvmppc_core_vcpu_create(kvm, id); vcpu = kvmppc_core_vcpu_create(kvm, id);
vcpu->arch.wqp = &vcpu->wq;
if (!IS_ERR(vcpu)) if (!IS_ERR(vcpu))
kvmppc_create_vcpu_debugfs(vcpu, id); kvmppc_create_vcpu_debugfs(vcpu, id);
return vcpu; return vcpu;
...@@ -316,8 +313,8 @@ static void kvmppc_decrementer_func(unsigned long data) ...@@ -316,8 +313,8 @@ static void kvmppc_decrementer_func(unsigned long data)
kvmppc_core_queue_dec(vcpu); kvmppc_core_queue_dec(vcpu);
if (waitqueue_active(&vcpu->wq)) { if (waitqueue_active(vcpu->arch.wqp)) {
wake_up_interruptible(&vcpu->wq); wake_up_interruptible(vcpu->arch.wqp);
vcpu->stat.halt_wakeup++; vcpu->stat.halt_wakeup++;
} }
} }
...@@ -570,13 +567,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ...@@ -570,13 +567,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{ {
if (irq->irq == KVM_INTERRUPT_UNSET) if (irq->irq == KVM_INTERRUPT_UNSET) {
kvmppc_core_dequeue_external(vcpu, irq); kvmppc_core_dequeue_external(vcpu, irq);
else return 0;
kvmppc_core_queue_external(vcpu, irq); }
kvmppc_core_queue_external(vcpu, irq);
if (waitqueue_active(&vcpu->wq)) { if (waitqueue_active(vcpu->arch.wqp)) {
wake_up_interruptible(&vcpu->wq); wake_up_interruptible(vcpu->arch.wqp);
vcpu->stat.halt_wakeup++; vcpu->stat.halt_wakeup++;
} else if (vcpu->cpu != -1) { } else if (vcpu->cpu != -1) {
smp_send_reschedule(vcpu->cpu); smp_send_reschedule(vcpu->cpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment