Commit 310bc395 authored by David Woodhouse's avatar David Woodhouse Committed by Paolo Bonzini

KVM: x86/xen: Avoid deadlock by adding kvm->arch.xen.xen_lock leaf node lock

In commit 14243b38 ("KVM: x86/xen: Add KVM_IRQ_ROUTING_XEN_EVTCHN
and event channel delivery") the clever version of me left some helpful
notes for those who would come after him:

       /*
        * For the irqfd workqueue, using the main kvm->lock mutex is
        * fine since this function is invoked from kvm_set_irq() with
        * no other lock held, no srcu. In future if it will be called
        * directly from a vCPU thread (e.g. on hypercall for an IPI)
        * then it may need to switch to using a leaf-node mutex for
        * serializing the shared_info mapping.
        */
       mutex_lock(&kvm->lock);

In commit 2fd6df2f ("KVM: x86/xen: intercept EVTCHNOP_send from guests")
the other version of me ran straight past that comment without reading it,
and introduced a potential deadlock by taking vcpu->mutex and kvm->lock
in the wrong order.

Solve this as originally suggested, by adding a leaf-node lock in the Xen
state rather than using kvm->lock for it.

Fixes: 2fd6df2f ("KVM: x86/xen: intercept EVTCHNOP_send from guests")
Signed-off-by: default avatarDavid Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20230111180651.14394-4-dwmw2@infradead.org>
[Rebase, add docs. - Paolo]
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 42a90008
...@@ -39,7 +39,7 @@ For SRCU: ...@@ -39,7 +39,7 @@ For SRCU:
On x86: On x86:
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock - vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock and kvm->arch.xen.xen_lock
- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and - kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and
kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and
......
...@@ -1111,6 +1111,7 @@ struct msr_bitmap_range { ...@@ -1111,6 +1111,7 @@ struct msr_bitmap_range {
/* Xen emulation context */ /* Xen emulation context */
struct kvm_xen { struct kvm_xen {
struct mutex xen_lock;
u32 xen_version; u32 xen_version;
bool long_mode; bool long_mode;
bool runstate_update_flag; bool runstate_update_flag;
......
...@@ -607,26 +607,26 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) ...@@ -607,26 +607,26 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) { if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) {
r = -EINVAL; r = -EINVAL;
} else { } else {
mutex_lock(&kvm->lock); mutex_lock(&kvm->arch.xen.xen_lock);
kvm->arch.xen.long_mode = !!data->u.long_mode; kvm->arch.xen.long_mode = !!data->u.long_mode;
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->arch.xen.xen_lock);
r = 0; r = 0;
} }
break; break;
case KVM_XEN_ATTR_TYPE_SHARED_INFO: case KVM_XEN_ATTR_TYPE_SHARED_INFO:
mutex_lock(&kvm->lock); mutex_lock(&kvm->arch.xen.xen_lock);
r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn); r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn);
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->arch.xen.xen_lock);
break; break;
case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
if (data->u.vector && data->u.vector < 0x10) if (data->u.vector && data->u.vector < 0x10)
r = -EINVAL; r = -EINVAL;
else { else {
mutex_lock(&kvm->lock); mutex_lock(&kvm->arch.xen.xen_lock);
kvm->arch.xen.upcall_vector = data->u.vector; kvm->arch.xen.upcall_vector = data->u.vector;
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->arch.xen.xen_lock);
r = 0; r = 0;
} }
break; break;
...@@ -636,9 +636,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) ...@@ -636,9 +636,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
break; break;
case KVM_XEN_ATTR_TYPE_XEN_VERSION: case KVM_XEN_ATTR_TYPE_XEN_VERSION:
mutex_lock(&kvm->lock); mutex_lock(&kvm->arch.xen.xen_lock);
kvm->arch.xen.xen_version = data->u.xen_version; kvm->arch.xen.xen_version = data->u.xen_version;
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->arch.xen.xen_lock);
r = 0; r = 0;
break; break;
...@@ -647,9 +647,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) ...@@ -647,9 +647,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
r = -EOPNOTSUPP; r = -EOPNOTSUPP;
break; break;
} }
mutex_lock(&kvm->lock); mutex_lock(&kvm->arch.xen.xen_lock);
kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag; kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag;
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->arch.xen.xen_lock);
r = 0; r = 0;
break; break;
...@@ -664,7 +664,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) ...@@ -664,7 +664,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
{ {
int r = -ENOENT; int r = -ENOENT;
mutex_lock(&kvm->lock); mutex_lock(&kvm->arch.xen.xen_lock);
switch (data->type) { switch (data->type) {
case KVM_XEN_ATTR_TYPE_LONG_MODE: case KVM_XEN_ATTR_TYPE_LONG_MODE:
...@@ -703,7 +703,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) ...@@ -703,7 +703,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
break; break;
} }
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->arch.xen.xen_lock);
return r; return r;
} }
...@@ -711,7 +711,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) ...@@ -711,7 +711,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
{ {
int idx, r = -ENOENT; int idx, r = -ENOENT;
mutex_lock(&vcpu->kvm->lock); mutex_lock(&vcpu->kvm->arch.xen.xen_lock);
idx = srcu_read_lock(&vcpu->kvm->srcu); idx = srcu_read_lock(&vcpu->kvm->srcu);
switch (data->type) { switch (data->type) {
...@@ -939,7 +939,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) ...@@ -939,7 +939,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
} }
srcu_read_unlock(&vcpu->kvm->srcu, idx); srcu_read_unlock(&vcpu->kvm->srcu, idx);
mutex_unlock(&vcpu->kvm->lock); mutex_unlock(&vcpu->kvm->arch.xen.xen_lock);
return r; return r;
} }
...@@ -947,7 +947,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) ...@@ -947,7 +947,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
{ {
int r = -ENOENT; int r = -ENOENT;
mutex_lock(&vcpu->kvm->lock); mutex_lock(&vcpu->kvm->arch.xen.xen_lock);
switch (data->type) { switch (data->type) {
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
...@@ -1030,7 +1030,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) ...@@ -1030,7 +1030,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break; break;
} }
mutex_unlock(&vcpu->kvm->lock); mutex_unlock(&vcpu->kvm->arch.xen.xen_lock);
return r; return r;
} }
...@@ -1123,7 +1123,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) ...@@ -1123,7 +1123,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
xhc->blob_size_32 || xhc->blob_size_64)) xhc->blob_size_32 || xhc->blob_size_64))
return -EINVAL; return -EINVAL;
mutex_lock(&kvm->lock); mutex_lock(&kvm->arch.xen.xen_lock);
if (xhc->msr && !kvm->arch.xen_hvm_config.msr) if (xhc->msr && !kvm->arch.xen_hvm_config.msr)
static_branch_inc(&kvm_xen_enabled.key); static_branch_inc(&kvm_xen_enabled.key);
...@@ -1132,7 +1132,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) ...@@ -1132,7 +1132,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc));
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->arch.xen.xen_lock);
return 0; return 0;
} }
...@@ -1675,15 +1675,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) ...@@ -1675,15 +1675,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
mm_borrowed = true; mm_borrowed = true;
} }
/* mutex_lock(&kvm->arch.xen.xen_lock);
* For the irqfd workqueue, using the main kvm->lock mutex is
* fine since this function is invoked from kvm_set_irq() with
* no other lock held, no srcu. In future if it will be called
* directly from a vCPU thread (e.g. on hypercall for an IPI)
* then it may need to switch to using a leaf-node mutex for
* serializing the shared_info mapping.
*/
mutex_lock(&kvm->lock);
/* /*
* It is theoretically possible for the page to be unmapped * It is theoretically possible for the page to be unmapped
...@@ -1712,7 +1704,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) ...@@ -1712,7 +1704,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
srcu_read_unlock(&kvm->srcu, idx); srcu_read_unlock(&kvm->srcu, idx);
} while(!rc); } while(!rc);
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->arch.xen.xen_lock);
if (mm_borrowed) if (mm_borrowed)
kthread_unuse_mm(kvm->mm); kthread_unuse_mm(kvm->mm);
...@@ -1828,7 +1820,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm, ...@@ -1828,7 +1820,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm,
int ret; int ret;
/* Protect writes to evtchnfd as well as the idr lookup. */ /* Protect writes to evtchnfd as well as the idr lookup. */
mutex_lock(&kvm->lock); mutex_lock(&kvm->arch.xen.xen_lock);
evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port); evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port);
ret = -ENOENT; ret = -ENOENT;
...@@ -1859,7 +1851,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm, ...@@ -1859,7 +1851,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm,
} }
ret = 0; ret = 0;
out_unlock: out_unlock:
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->arch.xen.xen_lock);
return ret; return ret;
} }
...@@ -1922,10 +1914,10 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm, ...@@ -1922,10 +1914,10 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority; evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority;
} }
mutex_lock(&kvm->lock); mutex_lock(&kvm->arch.xen.xen_lock);
ret = idr_alloc(&kvm->arch.xen.evtchn_ports, evtchnfd, port, port + 1, ret = idr_alloc(&kvm->arch.xen.evtchn_ports, evtchnfd, port, port + 1,
GFP_KERNEL); GFP_KERNEL);
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->arch.xen.xen_lock);
if (ret >= 0) if (ret >= 0)
return 0; return 0;
...@@ -1943,9 +1935,9 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port) ...@@ -1943,9 +1935,9 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port)
{ {
struct evtchnfd *evtchnfd; struct evtchnfd *evtchnfd;
mutex_lock(&kvm->lock); mutex_lock(&kvm->arch.xen.xen_lock);
evtchnfd = idr_remove(&kvm->arch.xen.evtchn_ports, port); evtchnfd = idr_remove(&kvm->arch.xen.evtchn_ports, port);
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->arch.xen.xen_lock);
if (!evtchnfd) if (!evtchnfd)
return -ENOENT; return -ENOENT;
...@@ -1963,7 +1955,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm) ...@@ -1963,7 +1955,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm)
int i; int i;
int n = 0; int n = 0;
mutex_lock(&kvm->lock); mutex_lock(&kvm->arch.xen.xen_lock);
/* /*
* Because synchronize_srcu() cannot be called inside the * Because synchronize_srcu() cannot be called inside the
...@@ -1975,7 +1967,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm) ...@@ -1975,7 +1967,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm)
all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL); all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL);
if (!all_evtchnfds) { if (!all_evtchnfds) {
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->arch.xen.xen_lock);
return -ENOMEM; return -ENOMEM;
} }
...@@ -1984,7 +1976,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm) ...@@ -1984,7 +1976,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm)
all_evtchnfds[n++] = evtchnfd; all_evtchnfds[n++] = evtchnfd;
idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port); idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port);
} }
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->arch.xen.xen_lock);
synchronize_srcu(&kvm->srcu); synchronize_srcu(&kvm->srcu);
...@@ -2086,6 +2078,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) ...@@ -2086,6 +2078,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
void kvm_xen_init_vm(struct kvm *kvm) void kvm_xen_init_vm(struct kvm *kvm)
{ {
mutex_init(&kvm->arch.xen.xen_lock);
idr_init(&kvm->arch.xen.evtchn_ports); idr_init(&kvm->arch.xen.evtchn_ports);
kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN); kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment