Commit 8dbc4110 authored by Oliver Upton's avatar Oliver Upton

Merge branch kvm-arm64/lpi-xarray into kvmarm/next

* kvm-arm64/lpi-xarray:
  : xarray-based representation of vgic LPIs
  :
  : KVM's linked-list of LPI state has proven to be a bottleneck in LPI
  : injection paths, due to lock serialization when acquiring / releasing a
  : reference on an IRQ.
  :
  : Start the tedious process of reworking KVM's LPI injection by replacing
  : the LPI linked-list with an xarray, leveraging this to allow RCU readers
  : to walk it outside of the spinlock.
  KVM: arm64: vgic: Don't acquire the lpi_list_lock in vgic_put_irq()
  KVM: arm64: vgic: Ensure the irq refcount is nonzero when taking a ref
  KVM: arm64: vgic: Rely on RCU protection in vgic_get_lpi()
  KVM: arm64: vgic: Free LPI vgic_irq structs in an RCU-safe manner
  KVM: arm64: vgic: Use atomics to count LPIs
  KVM: arm64: vgic: Get rid of the LPI linked-list
  KVM: arm64: vgic-its: Walk the LPI xarray in vgic_copy_lpi_list()
  KVM: arm64: vgic-v3: Iterate the xarray to find pending LPIs
  KVM: arm64: vgic: Use xarray to find LPI in vgic_get_lpi()
  KVM: arm64: vgic: Store LPIs in an xarray
Signed-off-by: default avatarOliver Upton <oliver.upton@linux.dev>
parents 0d874858 e27f2d56
...@@ -149,7 +149,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) ...@@ -149,7 +149,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2"); seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2");
seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
if (v3) if (v3)
seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count); seq_printf(s, "nr_lpis:\t%d\n", atomic_read(&dist->lpi_count));
seq_printf(s, "enabled:\t%d\n", dist->enabled); seq_printf(s, "enabled:\t%d\n", dist->enabled);
seq_printf(s, "\n"); seq_printf(s, "\n");
......
...@@ -53,9 +53,9 @@ void kvm_vgic_early_init(struct kvm *kvm) ...@@ -53,9 +53,9 @@ void kvm_vgic_early_init(struct kvm *kvm)
{ {
struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_dist *dist = &kvm->arch.vgic;
INIT_LIST_HEAD(&dist->lpi_list_head);
INIT_LIST_HEAD(&dist->lpi_translation_cache); INIT_LIST_HEAD(&dist->lpi_translation_cache);
raw_spin_lock_init(&dist->lpi_list_lock); raw_spin_lock_init(&dist->lpi_list_lock);
xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
} }
/* CREATION */ /* CREATION */
...@@ -366,6 +366,8 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) ...@@ -366,6 +366,8 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
if (vgic_supports_direct_msis(kvm)) if (vgic_supports_direct_msis(kvm))
vgic_v4_teardown(kvm); vgic_v4_teardown(kvm);
xa_destroy(&dist->lpi_xa);
} }
static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
......
...@@ -52,7 +52,12 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, ...@@ -52,7 +52,12 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
if (!irq) if (!irq)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&irq->lpi_list); ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
if (ret) {
kfree(irq);
return ERR_PTR(ret);
}
INIT_LIST_HEAD(&irq->ap_list); INIT_LIST_HEAD(&irq->ap_list);
raw_spin_lock_init(&irq->irq_lock); raw_spin_lock_init(&irq->irq_lock);
...@@ -68,30 +73,30 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, ...@@ -68,30 +73,30 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
* There could be a race with another vgic_add_lpi(), so we need to * There could be a race with another vgic_add_lpi(), so we need to
* check that we don't add a second list entry with the same LPI. * check that we don't add a second list entry with the same LPI.
*/ */
list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) { oldirq = xa_load(&dist->lpi_xa, intid);
if (oldirq->intid != intid) if (vgic_try_get_irq_kref(oldirq)) {
continue;
/* Someone was faster with adding this LPI, lets use that. */ /* Someone was faster with adding this LPI, lets use that. */
kfree(irq); kfree(irq);
irq = oldirq; irq = oldirq;
/* goto out_unlock;
* This increases the refcount, the caller is expected to }
* call vgic_put_irq() on the returned pointer once it's
* finished with the IRQ.
*/
vgic_get_irq_kref(irq);
ret = xa_err(xa_store(&dist->lpi_xa, intid, irq, 0));
if (ret) {
xa_release(&dist->lpi_xa, intid);
kfree(irq);
goto out_unlock; goto out_unlock;
} }
list_add_tail(&irq->lpi_list, &dist->lpi_list_head); atomic_inc(&dist->lpi_count);
dist->lpi_list_count++;
out_unlock: out_unlock:
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
if (ret)
return ERR_PTR(ret);
/* /*
* We "cache" the configuration table entries in our struct vgic_irq's. * We "cache" the configuration table entries in our struct vgic_irq's.
* However we only have those structs for mapped IRQs, so we read in * However we only have those structs for mapped IRQs, so we read in
...@@ -311,6 +316,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, ...@@ -311,6 +316,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
return 0; return 0;
} }
#define GIC_LPI_MAX_INTID ((1 << INTERRUPT_ID_BITS_ITS) - 1)
/* /*
* Create a snapshot of the current LPIs targeting @vcpu, so that we can * Create a snapshot of the current LPIs targeting @vcpu, so that we can
* enumerate those LPIs without holding any lock. * enumerate those LPIs without holding any lock.
...@@ -319,6 +326,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, ...@@ -319,6 +326,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
{ {
struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_dist *dist = &kvm->arch.vgic;
XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET);
struct vgic_irq *irq; struct vgic_irq *irq;
unsigned long flags; unsigned long flags;
u32 *intids; u32 *intids;
...@@ -331,13 +339,15 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) ...@@ -331,13 +339,15 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
* command). If coming from another path (such as enabling LPIs), * command). If coming from another path (such as enabling LPIs),
* we must be careful not to overrun the array. * we must be careful not to overrun the array.
*/ */
irq_count = READ_ONCE(dist->lpi_list_count); irq_count = atomic_read(&dist->lpi_count);
intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT); intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT);
if (!intids) if (!intids)
return -ENOMEM; return -ENOMEM;
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { rcu_read_lock();
xas_for_each(&xas, irq, GIC_LPI_MAX_INTID) {
if (i == irq_count) if (i == irq_count)
break; break;
/* We don't need to "get" the IRQ, as we hold the list lock. */ /* We don't need to "get" the IRQ, as we hold the list lock. */
...@@ -345,6 +355,8 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) ...@@ -345,6 +355,8 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
continue; continue;
intids[i++] = irq->intid; intids[i++] = irq->intid;
} }
rcu_read_unlock();
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
*intid_ptr = intids; *intid_ptr = intids;
...@@ -592,8 +604,8 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db, ...@@ -592,8 +604,8 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
irq = __vgic_its_check_cache(dist, db, devid, eventid); irq = __vgic_its_check_cache(dist, db, devid, eventid);
if (irq) if (!vgic_try_get_irq_kref(irq))
vgic_get_irq_kref(irq); irq = NULL;
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
...@@ -637,8 +649,13 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, ...@@ -637,8 +649,13 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
* was in the cache, and increment it on the new interrupt. * was in the cache, and increment it on the new interrupt.
*/ */
if (cte->irq) if (cte->irq)
__vgic_put_lpi_locked(kvm, cte->irq); vgic_put_irq(kvm, cte->irq);
/*
* The irq refcount is guaranteed to be nonzero while holding the
* its_lock, as the ITE (and the reference it holds) cannot be freed.
*/
lockdep_assert_held(&its->its_lock);
vgic_get_irq_kref(irq); vgic_get_irq_kref(irq);
cte->db = db; cte->db = db;
...@@ -669,7 +686,7 @@ void vgic_its_invalidate_cache(struct kvm *kvm) ...@@ -669,7 +686,7 @@ void vgic_its_invalidate_cache(struct kvm *kvm)
if (!cte->irq) if (!cte->irq)
break; break;
__vgic_put_lpi_locked(kvm, cte->irq); vgic_put_irq(kvm, cte->irq);
cte->irq = NULL; cte->irq = NULL;
} }
......
...@@ -380,6 +380,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) ...@@ -380,6 +380,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
struct vgic_irq *irq; struct vgic_irq *irq;
gpa_t last_ptr = ~(gpa_t)0; gpa_t last_ptr = ~(gpa_t)0;
bool vlpi_avail = false; bool vlpi_avail = false;
unsigned long index;
int ret = 0; int ret = 0;
u8 val; u8 val;
...@@ -396,7 +397,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) ...@@ -396,7 +397,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
vlpi_avail = true; vlpi_avail = true;
} }
list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { xa_for_each(&dist->lpi_xa, index, irq) {
int byte_offset, bit_nr; int byte_offset, bit_nr;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
gpa_t pendbase, ptr; gpa_t pendbase, ptr;
......
...@@ -30,6 +30,7 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { ...@@ -30,6 +30,7 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* its->its_lock (mutex) * its->its_lock (mutex)
* vgic_cpu->ap_list_lock must be taken with IRQs disabled * vgic_cpu->ap_list_lock must be taken with IRQs disabled
* kvm->lpi_list_lock must be taken with IRQs disabled * kvm->lpi_list_lock must be taken with IRQs disabled
* vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled
* vgic_irq->irq_lock must be taken with IRQs disabled * vgic_irq->irq_lock must be taken with IRQs disabled
* *
* As the ap_list_lock might be taken from the timer interrupt handler, * As the ap_list_lock might be taken from the timer interrupt handler,
...@@ -54,32 +55,22 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { ...@@ -54,32 +55,22 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
*/ */
/* /*
* Iterate over the VM's list of mapped LPIs to find the one with a * Index the VM's xarray of mapped LPIs and return a reference to the IRQ
* matching interrupt ID and return a reference to the IRQ structure. * structure. The caller is expected to call vgic_put_irq() later once it's
* finished with the IRQ.
*/ */
static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
{ {
struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_dist *dist = &kvm->arch.vgic;
struct vgic_irq *irq = NULL; struct vgic_irq *irq = NULL;
unsigned long flags;
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { rcu_read_lock();
if (irq->intid != intid)
continue;
/* irq = xa_load(&dist->lpi_xa, intid);
* This increases the refcount, the caller is expected to if (!vgic_try_get_irq_kref(irq))
* call vgic_put_irq() later once it's finished with the IRQ.
*/
vgic_get_irq_kref(irq);
goto out_unlock;
}
irq = NULL; irq = NULL;
out_unlock: rcu_read_unlock();
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
return irq; return irq;
} }
...@@ -120,22 +111,6 @@ static void vgic_irq_release(struct kref *ref) ...@@ -120,22 +111,6 @@ static void vgic_irq_release(struct kref *ref)
{ {
} }
/*
* Drop the refcount on the LPI. Must be called with lpi_list_lock held.
*/
void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
{
struct vgic_dist *dist = &kvm->arch.vgic;
if (!kref_put(&irq->refcount, vgic_irq_release))
return;
list_del(&irq->lpi_list);
dist->lpi_list_count--;
kfree(irq);
}
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{ {
struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_dist *dist = &kvm->arch.vgic;
...@@ -144,9 +119,15 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) ...@@ -144,9 +119,15 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
if (irq->intid < VGIC_MIN_LPI) if (irq->intid < VGIC_MIN_LPI)
return; return;
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); if (!kref_put(&irq->refcount, vgic_irq_release))
__vgic_put_lpi_locked(kvm, irq); return;
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
xa_lock_irqsave(&dist->lpi_xa, flags);
__xa_erase(&dist->lpi_xa, irq->intid);
xa_unlock_irqrestore(&dist->lpi_xa, flags);
atomic_dec(&dist->lpi_count);
kfree_rcu(irq, rcu);
} }
void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
...@@ -404,7 +385,8 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, ...@@ -404,7 +385,8 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
/* /*
* Grab a reference to the irq to reflect the fact that it is * Grab a reference to the irq to reflect the fact that it is
* now in the ap_list. * now in the ap_list. This is safe as the caller must already hold a
* reference on the irq.
*/ */
vgic_get_irq_kref(irq); vgic_get_irq_kref(irq);
list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
......
...@@ -180,7 +180,6 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, ...@@ -180,7 +180,6 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
gpa_t addr, int len); gpa_t addr, int len);
struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
u32 intid); u32 intid);
void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq);
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
bool vgic_get_phys_line_level(struct vgic_irq *irq); bool vgic_get_phys_line_level(struct vgic_irq *irq);
void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
...@@ -220,12 +219,20 @@ void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); ...@@ -220,12 +219,20 @@ void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
void vgic_v2_save_state(struct kvm_vcpu *vcpu); void vgic_v2_save_state(struct kvm_vcpu *vcpu);
void vgic_v2_restore_state(struct kvm_vcpu *vcpu); void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
static inline void vgic_get_irq_kref(struct vgic_irq *irq) static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq)
{ {
if (!irq)
return false;
if (irq->intid < VGIC_MIN_LPI) if (irq->intid < VGIC_MIN_LPI)
return; return true;
kref_get(&irq->refcount); return kref_get_unless_zero(&irq->refcount);
}
static inline void vgic_get_irq_kref(struct vgic_irq *irq)
{
WARN_ON_ONCE(!vgic_try_get_irq_kref(irq));
} }
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/static_key.h> #include <linux/static_key.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/xarray.h>
#include <kvm/iodev.h> #include <kvm/iodev.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/jump_label.h> #include <linux/jump_label.h>
...@@ -116,7 +117,7 @@ struct irq_ops { ...@@ -116,7 +117,7 @@ struct irq_ops {
struct vgic_irq { struct vgic_irq {
raw_spinlock_t irq_lock; /* Protects the content of the struct */ raw_spinlock_t irq_lock; /* Protects the content of the struct */
struct list_head lpi_list; /* Used to link all LPIs together */ struct rcu_head rcu;
struct list_head ap_list; struct list_head ap_list;
struct kvm_vcpu *vcpu; /* SGIs and PPIs: The VCPU struct kvm_vcpu *vcpu; /* SGIs and PPIs: The VCPU
...@@ -273,10 +274,10 @@ struct vgic_dist { ...@@ -273,10 +274,10 @@ struct vgic_dist {
*/ */
u64 propbaser; u64 propbaser;
/* Protects the lpi_list and the count value below. */ /* Protects the lpi_list. */
raw_spinlock_t lpi_list_lock; raw_spinlock_t lpi_list_lock;
struct list_head lpi_list_head; struct xarray lpi_xa;
int lpi_list_count; atomic_t lpi_count;
/* LPI translation cache */ /* LPI translation cache */
struct list_head lpi_translation_cache; struct list_head lpi_translation_cache;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment