Commit 14243b38 authored by David Woodhouse's avatar David Woodhouse Committed by Paolo Bonzini

KVM: x86/xen: Add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel delivery

This adds basic support for delivering 2 level event channels to a guest.

Initially, it only supports delivery via the IRQ routing table, triggered
by an eventfd. In order to do so, it has a kvm_xen_set_evtchn_fast()
function which will use the pre-mapped shared_info page if it already
exists and is still valid, while the slow path through the irqfd_inject
workqueue will remap the shared_info page if necessary.

It sets the bits in the shared_info page but not the vcpu_info; that is
deferred to __kvm_xen_has_interrupt() which raises the vector to the
appropriate vCPU.

Add a 'verbose' mode to xen_shinfo_test while adding test cases for this.
Signed-off-by: default avatarDavid Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20211210163625.2886-5-dwmw2@infradead.org>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 1cfc9c4b
...@@ -1799,6 +1799,7 @@ No flags are specified so far, the corresponding field must be set to zero. ...@@ -1799,6 +1799,7 @@ No flags are specified so far, the corresponding field must be set to zero.
struct kvm_irq_routing_msi msi; struct kvm_irq_routing_msi msi;
struct kvm_irq_routing_s390_adapter adapter; struct kvm_irq_routing_s390_adapter adapter;
struct kvm_irq_routing_hv_sint hv_sint; struct kvm_irq_routing_hv_sint hv_sint;
struct kvm_irq_routing_xen_evtchn xen_evtchn;
__u32 pad[8]; __u32 pad[8];
} u; } u;
}; };
...@@ -1808,6 +1809,7 @@ No flags are specified so far, the corresponding field must be set to zero. ...@@ -1808,6 +1809,7 @@ No flags are specified so far, the corresponding field must be set to zero.
#define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_MSI 2
#define KVM_IRQ_ROUTING_S390_ADAPTER 3 #define KVM_IRQ_ROUTING_S390_ADAPTER 3
#define KVM_IRQ_ROUTING_HV_SINT 4 #define KVM_IRQ_ROUTING_HV_SINT 4
#define KVM_IRQ_ROUTING_XEN_EVTCHN 5
flags: flags:
...@@ -1859,6 +1861,20 @@ address_hi must be zero. ...@@ -1859,6 +1861,20 @@ address_hi must be zero.
__u32 sint; __u32 sint;
}; };
struct kvm_irq_routing_xen_evtchn {
__u32 port;
__u32 vcpu;
__u32 priority;
};
When KVM_CAP_XEN_HVM includes the KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL bit
in its indication of supported features, routing to Xen event channels
is supported. Although the priority field is present, only the value
KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL is supported, which means delivery by
2 level event channels. FIFO event channel support may be added in
the future.
4.55 KVM_SET_TSC_KHZ 4.55 KVM_SET_TSC_KHZ
-------------------- --------------------
...@@ -7413,6 +7429,7 @@ PVHVM guests. Valid flags are:: ...@@ -7413,6 +7429,7 @@ PVHVM guests. Valid flags are::
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 2) #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 2)
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 3)
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
ioctl is available, for the guest to set its hypercall page. ioctl is available, for the guest to set its hypercall page.
...@@ -7432,6 +7449,10 @@ The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related ...@@ -7432,6 +7449,10 @@ The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related
features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are
supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls. supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.
The KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL flag indicates that IRQ routing entries
of the type KVM_IRQ_ROUTING_XEN_EVTCHN are supported, with the priority
field set to indicate 2 level event channel delivery.
8.31 KVM_CAP_PPC_MULTITCE 8.31 KVM_CAP_PPC_MULTITCE
------------------------- -------------------------
......
...@@ -610,6 +610,7 @@ struct kvm_vcpu_xen { ...@@ -610,6 +610,7 @@ struct kvm_vcpu_xen {
u64 last_steal; u64 last_steal;
u64 runstate_entry_time; u64 runstate_entry_time;
u64 runstate_times[4]; u64 runstate_times[4];
unsigned long evtchn_pending_sel;
}; };
struct kvm_vcpu_arch { struct kvm_vcpu_arch {
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "hyperv.h" #include "hyperv.h"
#include "x86.h" #include "x86.h"
#include "xen.h"
static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level, struct kvm *kvm, int irq_source_id, int level,
...@@ -175,6 +176,13 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, ...@@ -175,6 +176,13 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
return r; return r;
break; break;
#ifdef CONFIG_KVM_XEN
case KVM_IRQ_ROUTING_XEN_EVTCHN:
if (!level)
return -1;
return kvm_xen_set_evtchn_fast(e, kvm);
#endif
default: default:
break; break;
} }
...@@ -310,6 +318,10 @@ int kvm_set_routing_entry(struct kvm *kvm, ...@@ -310,6 +318,10 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->hv_sint.vcpu = ue->u.hv_sint.vcpu; e->hv_sint.vcpu = ue->u.hv_sint.vcpu;
e->hv_sint.sint = ue->u.hv_sint.sint; e->hv_sint.sint = ue->u.hv_sint.sint;
break; break;
#ifdef CONFIG_KVM_XEN
case KVM_IRQ_ROUTING_XEN_EVTCHN:
return kvm_xen_setup_evtchn(kvm, e, ue);
#endif
default: default:
return -EINVAL; return -EINVAL;
} }
......
...@@ -4188,7 +4188,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -4188,7 +4188,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_XEN_HVM: case KVM_CAP_XEN_HVM:
r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR | r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
KVM_XEN_HVM_CONFIG_SHARED_INFO; KVM_XEN_HVM_CONFIG_SHARED_INFO |
KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL;
if (sched_info_on()) if (sched_info_on())
r |= KVM_XEN_HVM_CONFIG_RUNSTATE; r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
break; break;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <trace/events/kvm.h> #include <trace/events/kvm.h>
#include <xen/interface/xen.h> #include <xen/interface/xen.h>
#include <xen/interface/vcpu.h> #include <xen/interface/vcpu.h>
#include <xen/interface/event_channel.h>
#include "trace.h" #include "trace.h"
...@@ -195,6 +196,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) ...@@ -195,6 +196,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
int __kvm_xen_has_interrupt(struct kvm_vcpu *v) int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
{ {
unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel);
bool atomic = in_atomic() || !task_is_running(current);
int err; int err;
u8 rc = 0; u8 rc = 0;
...@@ -204,6 +207,9 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) ...@@ -204,6 +207,9 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
*/ */
struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache; struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache;
struct kvm_memslots *slots = kvm_memslots(v->kvm); struct kvm_memslots *slots = kvm_memslots(v->kvm);
bool ghc_valid = slots->generation == ghc->generation &&
!kvm_is_error_hva(ghc->hva) && ghc->memslot;
unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending); unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending);
/* No need for compat handling here */ /* No need for compat handling here */
...@@ -219,8 +225,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) ...@@ -219,8 +225,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
* cache in kvm_read_guest_offset_cached(), but just uses * cache in kvm_read_guest_offset_cached(), but just uses
* __get_user() instead. And falls back to the slow path. * __get_user() instead. And falls back to the slow path.
*/ */
if (likely(slots->generation == ghc->generation && if (!evtchn_pending_sel && ghc_valid) {
!kvm_is_error_hva(ghc->hva) && ghc->memslot)) {
/* Fast path */ /* Fast path */
pagefault_disable(); pagefault_disable();
err = __get_user(rc, (u8 __user *)ghc->hva + offset); err = __get_user(rc, (u8 __user *)ghc->hva + offset);
...@@ -239,11 +244,82 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) ...@@ -239,11 +244,82 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
* and we'll end up getting called again from a context where we *can* * and we'll end up getting called again from a context where we *can*
* fault in the page and wait for it. * fault in the page and wait for it.
*/ */
if (in_atomic() || !task_is_running(current)) if (atomic)
return 1; return 1;
kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset, if (!ghc_valid) {
sizeof(rc)); err = kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len);
if (err || !ghc->memslot) {
/*
* If this failed, userspace has screwed up the
* vcpu_info mapping. No interrupts for you.
*/
return 0;
}
}
/*
* Now we have a valid (protected by srcu) userspace HVA in
* ghc->hva which points to the struct vcpu_info. If there
* are any bits in the in-kernel evtchn_pending_sel then
* we need to write those to the guest vcpu_info and set
* its evtchn_upcall_pending flag. If there aren't any bits
* to add, we only want to *check* evtchn_upcall_pending.
*/
if (evtchn_pending_sel) {
bool long_mode = v->kvm->arch.xen.long_mode;
if (!user_access_begin((void __user *)ghc->hva, sizeof(struct vcpu_info)))
return 0;
if (IS_ENABLED(CONFIG_64BIT) && long_mode) {
struct vcpu_info __user *vi = (void __user *)ghc->hva;
/* Attempt to set the evtchn_pending_sel bits in the
* guest, and if that succeeds then clear the same
* bits in the in-kernel version. */
asm volatile("1:\t" LOCK_PREFIX "orq %0, %1\n"
"\tnotq %0\n"
"\t" LOCK_PREFIX "andq %0, %2\n"
"2:\n"
"\t.section .fixup,\"ax\"\n"
"3:\tjmp\t2b\n"
"\t.previous\n"
_ASM_EXTABLE_UA(1b, 3b)
: "=r" (evtchn_pending_sel),
"+m" (vi->evtchn_pending_sel),
"+m" (v->arch.xen.evtchn_pending_sel)
: "0" (evtchn_pending_sel));
} else {
struct compat_vcpu_info __user *vi = (void __user *)ghc->hva;
u32 evtchn_pending_sel32 = evtchn_pending_sel;
/* Attempt to set the evtchn_pending_sel bits in the
* guest, and if that succeeds then clear the same
* bits in the in-kernel version. */
asm volatile("1:\t" LOCK_PREFIX "orl %0, %1\n"
"\tnotl %0\n"
"\t" LOCK_PREFIX "andl %0, %2\n"
"2:\n"
"\t.section .fixup,\"ax\"\n"
"3:\tjmp\t2b\n"
"\t.previous\n"
_ASM_EXTABLE_UA(1b, 3b)
: "=r" (evtchn_pending_sel32),
"+m" (vi->evtchn_pending_sel),
"+m" (v->arch.xen.evtchn_pending_sel)
: "0" (evtchn_pending_sel32));
}
rc = 1;
unsafe_put_user(rc, (u8 __user *)ghc->hva + offset, err);
err:
user_access_end();
mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
} else {
__get_user(rc, (u8 __user *)ghc->hva + offset);
}
return rc; return rc;
} }
...@@ -740,3 +816,179 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) ...@@ -740,3 +816,179 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
return 0; return 0;
} }
static inline int max_evtchn_port(struct kvm *kvm)
{
if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode)
return EVTCHN_2L_NR_CHANNELS;
else
return COMPAT_EVTCHN_2L_NR_CHANNELS;
}
/*
* This follows the kvm_set_irq() API, so it returns:
* < 0 Interrupt was ignored (masked or not delivered for other reasons)
* = 0 Interrupt was coalesced (previous irq is still pending)
* > 0 Number of CPUs interrupt was delivered to
*/
int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm)
{
struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
struct kvm_vcpu *vcpu;
unsigned long *pending_bits, *mask_bits;
unsigned long flags;
int port_word_bit;
bool kick_vcpu = false;
int idx;
int rc;
vcpu = kvm_get_vcpu_by_id(kvm, e->xen_evtchn.vcpu);
if (!vcpu)
return -1;
if (!vcpu->arch.xen.vcpu_info_set)
return -1;
if (e->xen_evtchn.port >= max_evtchn_port(kvm))
return -1;
rc = -EWOULDBLOCK;
read_lock_irqsave(&gpc->lock, flags);
idx = srcu_read_lock(&kvm->srcu);
if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
goto out_rcu;
if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
struct shared_info *shinfo = gpc->khva;
pending_bits = (unsigned long *)&shinfo->evtchn_pending;
mask_bits = (unsigned long *)&shinfo->evtchn_mask;
port_word_bit = e->xen_evtchn.port / 64;
} else {
struct compat_shared_info *shinfo = gpc->khva;
pending_bits = (unsigned long *)&shinfo->evtchn_pending;
mask_bits = (unsigned long *)&shinfo->evtchn_mask;
port_word_bit = e->xen_evtchn.port / 32;
}
/*
* If this port wasn't already set, and if it isn't masked, then
* we try to set the corresponding bit in the in-kernel shadow of
* evtchn_pending_sel for the target vCPU. And if *that* wasn't
* already set, then we kick the vCPU in question to write to the
* *real* evtchn_pending_sel in its own guest vcpu_info struct.
*/
if (test_and_set_bit(e->xen_evtchn.port, pending_bits)) {
rc = 0; /* It was already raised */
} else if (test_bit(e->xen_evtchn.port, mask_bits)) {
rc = -1; /* Masked */
} else {
rc = 1; /* Delivered. But was the vCPU waking already? */
if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
kick_vcpu = true;
}
out_rcu:
srcu_read_unlock(&kvm->srcu, idx);
read_unlock_irqrestore(&gpc->lock, flags);
if (kick_vcpu) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
return rc;
}
/* This is the version called from kvm_set_irq() as the .set function */
static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
int irq_source_id, int level, bool line_status)
{
bool mm_borrowed = false;
int rc;
if (!level)
return -1;
rc = kvm_xen_set_evtchn_fast(e, kvm);
if (rc != -EWOULDBLOCK)
return rc;
if (current->mm != kvm->mm) {
/*
* If not on a thread which already belongs to this KVM,
* we'd better be in the irqfd workqueue.
*/
if (WARN_ON_ONCE(current->mm))
return -EINVAL;
kthread_use_mm(kvm->mm);
mm_borrowed = true;
}
/*
* For the irqfd workqueue, using the main kvm->lock mutex is
* fine since this function is invoked from kvm_set_irq() with
* no other lock held, no srcu. In future if it will be called
* directly from a vCPU thread (e.g. on hypercall for an IPI)
* then it may need to switch to using a leaf-node mutex for
* serializing the shared_info mapping.
*/
mutex_lock(&kvm->lock);
/*
* It is theoretically possible for the page to be unmapped
* and the MMU notifier to invalidate the shared_info before
* we even get to use it. In that case, this looks like an
* infinite loop. It was tempting to do it via the userspace
* HVA instead... but that just *hides* the fact that it's
* an infinite loop, because if a fault occurs and it waits
* for the page to come back, it can *still* immediately
* fault and have to wait again, repeatedly.
*
* Conversely, the page could also have been reinstated by
* another thread before we even obtain the mutex above, so
* check again *first* before remapping it.
*/
do {
struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
int idx;
rc = kvm_xen_set_evtchn_fast(e, kvm);
if (rc != -EWOULDBLOCK)
break;
idx = srcu_read_lock(&kvm->srcu);
rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa,
PAGE_SIZE, false);
srcu_read_unlock(&kvm->srcu, idx);
} while(!rc);
mutex_unlock(&kvm->lock);
if (mm_borrowed)
kthread_unuse_mm(kvm->mm);
return rc;
}
int kvm_xen_setup_evtchn(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm))
return -EINVAL;
/* We only support 2 level event channels for now */
if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
return -EINVAL;
e->xen_evtchn.port = ue->u.xen_evtchn.port;
e->xen_evtchn.vcpu = ue->u.xen_evtchn.vcpu;
e->xen_evtchn.priority = ue->u.xen_evtchn.priority;
e->set = evtchn_set_fn;
return 0;
}
...@@ -24,6 +24,12 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc); ...@@ -24,6 +24,12 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc);
void kvm_xen_init_vm(struct kvm *kvm); void kvm_xen_init_vm(struct kvm *kvm);
void kvm_xen_destroy_vm(struct kvm *kvm); void kvm_xen_destroy_vm(struct kvm *kvm);
int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm);
int kvm_xen_setup_evtchn(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
static inline bool kvm_xen_msr_enabled(struct kvm *kvm) static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
{ {
return static_branch_unlikely(&kvm_xen_enabled.key) && return static_branch_unlikely(&kvm_xen_enabled.key) &&
...@@ -134,6 +140,9 @@ struct compat_shared_info { ...@@ -134,6 +140,9 @@ struct compat_shared_info {
struct compat_arch_shared_info arch; struct compat_arch_shared_info arch;
}; };
#define COMPAT_EVTCHN_2L_NR_CHANNELS (8 * \
sizeof_field(struct compat_shared_info, \
evtchn_pending))
struct compat_vcpu_runstate_info { struct compat_vcpu_runstate_info {
int state; int state;
uint64_t state_entry_time; uint64_t state_entry_time;
......
...@@ -497,6 +497,12 @@ struct kvm_hv_sint { ...@@ -497,6 +497,12 @@ struct kvm_hv_sint {
u32 sint; u32 sint;
}; };
struct kvm_xen_evtchn {
u32 port;
u32 vcpu;
u32 priority;
};
struct kvm_kernel_irq_routing_entry { struct kvm_kernel_irq_routing_entry {
u32 gsi; u32 gsi;
u32 type; u32 type;
...@@ -517,6 +523,7 @@ struct kvm_kernel_irq_routing_entry { ...@@ -517,6 +523,7 @@ struct kvm_kernel_irq_routing_entry {
} msi; } msi;
struct kvm_s390_adapter_int adapter; struct kvm_s390_adapter_int adapter;
struct kvm_hv_sint hv_sint; struct kvm_hv_sint hv_sint;
struct kvm_xen_evtchn xen_evtchn;
}; };
struct hlist_node link; struct hlist_node link;
}; };
......
...@@ -1163,11 +1163,20 @@ struct kvm_irq_routing_hv_sint { ...@@ -1163,11 +1163,20 @@ struct kvm_irq_routing_hv_sint {
__u32 sint; __u32 sint;
}; };
struct kvm_irq_routing_xen_evtchn {
__u32 port;
__u32 vcpu;
__u32 priority;
};
#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1))
/* gsi routing entry types */ /* gsi routing entry types */
#define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_IRQCHIP 1
#define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_MSI 2
#define KVM_IRQ_ROUTING_S390_ADAPTER 3 #define KVM_IRQ_ROUTING_S390_ADAPTER 3
#define KVM_IRQ_ROUTING_HV_SINT 4 #define KVM_IRQ_ROUTING_HV_SINT 4
#define KVM_IRQ_ROUTING_XEN_EVTCHN 5
struct kvm_irq_routing_entry { struct kvm_irq_routing_entry {
__u32 gsi; __u32 gsi;
...@@ -1179,6 +1188,7 @@ struct kvm_irq_routing_entry { ...@@ -1179,6 +1188,7 @@ struct kvm_irq_routing_entry {
struct kvm_irq_routing_msi msi; struct kvm_irq_routing_msi msi;
struct kvm_irq_routing_s390_adapter adapter; struct kvm_irq_routing_s390_adapter adapter;
struct kvm_irq_routing_hv_sint hv_sint; struct kvm_irq_routing_hv_sint hv_sint;
struct kvm_irq_routing_xen_evtchn xen_evtchn;
__u32 pad[8]; __u32 pad[8];
} u; } u;
}; };
...@@ -1209,6 +1219,7 @@ struct kvm_x86_mce { ...@@ -1209,6 +1219,7 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
struct kvm_xen_hvm_config { struct kvm_xen_hvm_config {
__u32 flags; __u32 flags;
......
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
#include <stdint.h> #include <stdint.h>
#include <time.h> #include <time.h>
#include <sched.h> #include <sched.h>
#include <signal.h>
#include <sys/eventfd.h>
#define VCPU_ID 5 #define VCPU_ID 5
...@@ -22,10 +25,15 @@ ...@@ -22,10 +25,15 @@
#define SHINFO_REGION_SLOT 10 #define SHINFO_REGION_SLOT 10
#define PAGE_SIZE 4096 #define PAGE_SIZE 4096
#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE))
#define DUMMY_REGION_SLOT 11
#define SHINFO_ADDR (SHINFO_REGION_GPA)
#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
#define SHINFO_VADDR (SHINFO_REGION_GVA)
#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
...@@ -73,15 +81,37 @@ struct vcpu_info { ...@@ -73,15 +81,37 @@ struct vcpu_info {
struct pvclock_vcpu_time_info time; struct pvclock_vcpu_time_info time;
}; /* 64 bytes (x86) */ }; /* 64 bytes (x86) */
struct shared_info {
struct vcpu_info vcpu_info[32];
unsigned long evtchn_pending[64];
unsigned long evtchn_mask[64];
struct pvclock_wall_clock wc;
uint32_t wc_sec_hi;
/* arch_shared_info here */
};
#define RUNSTATE_running 0 #define RUNSTATE_running 0
#define RUNSTATE_runnable 1 #define RUNSTATE_runnable 1
#define RUNSTATE_blocked 2 #define RUNSTATE_blocked 2
#define RUNSTATE_offline 3 #define RUNSTATE_offline 3
static const char *runstate_names[] = {
"running",
"runnable",
"blocked",
"offline"
};
struct {
struct kvm_irq_routing info;
struct kvm_irq_routing_entry entries[2];
} irq_routes;
static void evtchn_handler(struct ex_regs *regs) static void evtchn_handler(struct ex_regs *regs)
{ {
struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
vi->evtchn_upcall_pending = 0; vi->evtchn_upcall_pending = 0;
vi->evtchn_pending_sel = 0;
GUEST_SYNC(0x20); GUEST_SYNC(0x20);
} }
...@@ -127,7 +157,25 @@ static void guest_code(void) ...@@ -127,7 +157,25 @@ static void guest_code(void)
GUEST_SYNC(6); GUEST_SYNC(6);
GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
GUEST_DONE(); /* Attempt to deliver a *masked* interrupt */
GUEST_SYNC(7);
/* Wait until we see the bit set */
struct shared_info *si = (void *)SHINFO_VADDR;
while (!si->evtchn_pending[0])
__asm__ __volatile__ ("rep nop" : : : "memory");
/* Now deliver an *unmasked* interrupt */
GUEST_SYNC(8);
while (!si->evtchn_pending[1])
__asm__ __volatile__ ("rep nop" : : : "memory");
/* Change memslots and deliver an interrupt */
GUEST_SYNC(9);
for (;;)
__asm__ __volatile__ ("rep nop" : : : "memory");
} }
static int cmp_timespec(struct timespec *a, struct timespec *b) static int cmp_timespec(struct timespec *a, struct timespec *b)
...@@ -144,9 +192,18 @@ static int cmp_timespec(struct timespec *a, struct timespec *b) ...@@ -144,9 +192,18 @@ static int cmp_timespec(struct timespec *a, struct timespec *b)
return 0; return 0;
} }
static void handle_alrm(int sig)
{
TEST_FAIL("IRQ delivery timed out");
}
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
struct timespec min_ts, max_ts, vm_ts; struct timespec min_ts, max_ts, vm_ts;
bool verbose;
verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
!strncmp(argv[1], "--verbose", 10));
int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
...@@ -155,6 +212,7 @@ int main(int argc, char *argv[]) ...@@ -155,6 +212,7 @@ int main(int argc, char *argv[])
} }
bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
clock_gettime(CLOCK_REALTIME, &min_ts); clock_gettime(CLOCK_REALTIME, &min_ts);
...@@ -166,6 +224,11 @@ int main(int argc, char *argv[]) ...@@ -166,6 +224,11 @@ int main(int argc, char *argv[])
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2); virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
int zero_fd = open("/dev/zero", O_RDONLY);
TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
struct kvm_xen_hvm_config hvmc = { struct kvm_xen_hvm_config hvmc = {
.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
.msr = XEN_HYPERCALL_MSR, .msr = XEN_HYPERCALL_MSR,
...@@ -184,6 +247,16 @@ int main(int argc, char *argv[]) ...@@ -184,6 +247,16 @@ int main(int argc, char *argv[])
}; };
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
/*
* Test what happens when the HVA of the shinfo page is remapped after
* the kernel has a reference to it. But make sure we copy the clock
* info over since that's only set at setup time, and we test it later.
*/
struct pvclock_wall_clock wc_copy = shinfo->wc;
void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
shinfo->wc = wc_copy;
struct kvm_xen_vcpu_attr vi = { struct kvm_xen_vcpu_attr vi = {
.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
.u.gpa = VCPU_INFO_ADDR, .u.gpa = VCPU_INFO_ADDR,
...@@ -214,6 +287,49 @@ int main(int argc, char *argv[]) ...@@ -214,6 +287,49 @@ int main(int argc, char *argv[])
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
} }
int irq_fd[2] = { -1, -1 };
if (do_eventfd_tests) {
irq_fd[0] = eventfd(0, 0);
irq_fd[1] = eventfd(0, 0);
/* Unexpected, but not a KVM failure */
if (irq_fd[0] == -1 || irq_fd[1] == -1)
do_eventfd_tests = false;
}
if (do_eventfd_tests) {
irq_routes.info.nr = 2;
irq_routes.entries[0].gsi = 32;
irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
irq_routes.entries[0].u.xen_evtchn.port = 15;
irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID;
irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
irq_routes.entries[1].gsi = 33;
irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
irq_routes.entries[1].u.xen_evtchn.port = 66;
irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID;
irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes);
struct kvm_irqfd ifd = { };
ifd.fd = irq_fd[0];
ifd.gsi = 32;
vm_ioctl(vm, KVM_IRQFD, &ifd);
ifd.fd = irq_fd[1];
ifd.gsi = 33;
vm_ioctl(vm, KVM_IRQFD, &ifd);
struct sigaction sa = { };
sa.sa_handler = handle_alrm;
sigaction(SIGALRM, &sa, NULL);
}
struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
vinfo->evtchn_upcall_pending = 0; vinfo->evtchn_upcall_pending = 0;
...@@ -248,6 +364,8 @@ int main(int argc, char *argv[]) ...@@ -248,6 +364,8 @@ int main(int argc, char *argv[])
switch (uc.args[1]) { switch (uc.args[1]) {
case 0: case 0:
if (verbose)
printf("Delivering evtchn upcall\n");
evtchn_irq_expected = true; evtchn_irq_expected = true;
vinfo->evtchn_upcall_pending = 1; vinfo->evtchn_upcall_pending = 1;
break; break;
...@@ -256,11 +374,16 @@ int main(int argc, char *argv[]) ...@@ -256,11 +374,16 @@ int main(int argc, char *argv[])
TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
if (!do_runstate_tests) if (!do_runstate_tests)
goto done; goto done;
if (verbose)
printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
rst.u.runstate.state = uc.args[1]; rst.u.runstate.state = uc.args[1];
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
break; break;
case 4: case 4:
if (verbose)
printf("Testing RUNSTATE_ADJUST\n");
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
memset(&rst.u, 0, sizeof(rst.u)); memset(&rst.u, 0, sizeof(rst.u));
rst.u.runstate.state = (uint64_t)-1; rst.u.runstate.state = (uint64_t)-1;
...@@ -274,6 +397,8 @@ int main(int argc, char *argv[]) ...@@ -274,6 +397,8 @@ int main(int argc, char *argv[])
break; break;
case 5: case 5:
if (verbose)
printf("Testing RUNSTATE_DATA\n");
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
memset(&rst.u, 0, sizeof(rst.u)); memset(&rst.u, 0, sizeof(rst.u));
rst.u.runstate.state = RUNSTATE_running; rst.u.runstate.state = RUNSTATE_running;
...@@ -282,16 +407,54 @@ int main(int argc, char *argv[]) ...@@ -282,16 +407,54 @@ int main(int argc, char *argv[])
rst.u.runstate.time_offline = 0x5a; rst.u.runstate.time_offline = 0x5a;
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
break; break;
case 6: case 6:
if (verbose)
printf("Testing steal time\n");
/* Yield until scheduler delay exceeds target */ /* Yield until scheduler delay exceeds target */
rundelay = get_run_delay() + MIN_STEAL_TIME; rundelay = get_run_delay() + MIN_STEAL_TIME;
do { do {
sched_yield(); sched_yield();
} while (get_run_delay() < rundelay); } while (get_run_delay() < rundelay);
break; break;
case 7:
if (!do_eventfd_tests)
goto done;
if (verbose)
printf("Testing masked event channel\n");
shinfo->evtchn_mask[0] = 0x8000;
eventfd_write(irq_fd[0], 1UL);
alarm(1);
break;
case 8:
if (verbose)
printf("Testing unmasked event channel\n");
/* Unmask that, but deliver the other one */
shinfo->evtchn_pending[0] = 0;
shinfo->evtchn_mask[0] = 0;
eventfd_write(irq_fd[1], 1UL);
evtchn_irq_expected = true;
alarm(1);
break;
case 9:
if (verbose)
printf("Testing event channel after memslot change\n");
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
eventfd_write(irq_fd[0], 1UL);
evtchn_irq_expected = true;
alarm(1);
break;
case 0x20: case 0x20:
TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
evtchn_irq_expected = false; evtchn_irq_expected = false;
if (shinfo->evtchn_pending[1] &&
shinfo->evtchn_pending[0])
goto done;
break; break;
} }
break; break;
...@@ -318,6 +481,16 @@ int main(int argc, char *argv[]) ...@@ -318,6 +481,16 @@ int main(int argc, char *argv[])
ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
ti2 = addr_gpa2hva(vm, PVTIME_ADDR); ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
if (verbose) {
printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
ti->tsc_shift, ti->flags);
printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
ti2->tsc_shift, ti2->flags);
}
vm_ts.tv_sec = wc->sec; vm_ts.tv_sec = wc->sec;
vm_ts.tv_nsec = wc->nsec; vm_ts.tv_nsec = wc->nsec;
TEST_ASSERT(wc->version && !(wc->version & 1), TEST_ASSERT(wc->version && !(wc->version & 1),
...@@ -341,6 +514,15 @@ int main(int argc, char *argv[]) ...@@ -341,6 +514,15 @@ int main(int argc, char *argv[])
}; };
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst); vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
if (verbose) {
printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
rs->state, rs->state_entry_time);
for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
printf("State %s: %" PRIu64 " ns\n",
runstate_names[i], rs->time[i]);
}
}
TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
"State entry time mismatch"); "State entry time mismatch");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment