Commit 8dbc4110 authored by Oliver Upton's avatar Oliver Upton

Merge branch kvm-arm64/lpi-xarray into kvmarm/next

* kvm-arm64/lpi-xarray:
  : xarray-based representation of vgic LPIs
  :
  : KVM's linked-list of LPI state has proven to be a bottleneck in LPI
  : injection paths, due to lock serialization when acquiring / releasing a
  : reference on an IRQ.
  :
  : Start the tedious process of reworking KVM's LPI injection by replacing
  : the LPI linked-list with an xarray, leveraging this to allow RCU readers
  : to walk it outside of the spinlock.
  KVM: arm64: vgic: Don't acquire the lpi_list_lock in vgic_put_irq()
  KVM: arm64: vgic: Ensure the irq refcount is nonzero when taking a ref
  KVM: arm64: vgic: Rely on RCU protection in vgic_get_lpi()
  KVM: arm64: vgic: Free LPI vgic_irq structs in an RCU-safe manner
  KVM: arm64: vgic: Use atomics to count LPIs
  KVM: arm64: vgic: Get rid of the LPI linked-list
  KVM: arm64: vgic-its: Walk the LPI xarray in vgic_copy_lpi_list()
  KVM: arm64: vgic-v3: Iterate the xarray to find pending LPIs
  KVM: arm64: vgic: Use xarray to find LPI in vgic_get_lpi()
  KVM: arm64: vgic: Store LPIs in an xarray
Signed-off-by: default avatarOliver Upton <oliver.upton@linux.dev>
parents 0d874858 e27f2d56
......@@ -149,7 +149,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2");
seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
if (v3)
seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count);
seq_printf(s, "nr_lpis:\t%d\n", atomic_read(&dist->lpi_count));
seq_printf(s, "enabled:\t%d\n", dist->enabled);
seq_printf(s, "\n");
......
......@@ -53,9 +53,9 @@ void kvm_vgic_early_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
INIT_LIST_HEAD(&dist->lpi_list_head);
INIT_LIST_HEAD(&dist->lpi_translation_cache);
raw_spin_lock_init(&dist->lpi_list_lock);
xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
}
/* CREATION */
......@@ -366,6 +366,8 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
if (vgic_supports_direct_msis(kvm))
vgic_v4_teardown(kvm);
xa_destroy(&dist->lpi_xa);
}
static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
......
......@@ -52,7 +52,12 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
if (!irq)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&irq->lpi_list);
ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
if (ret) {
kfree(irq);
return ERR_PTR(ret);
}
INIT_LIST_HEAD(&irq->ap_list);
raw_spin_lock_init(&irq->irq_lock);
......@@ -68,30 +73,30 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
* There could be a race with another vgic_add_lpi(), so we need to
* check that we don't add a second list entry with the same LPI.
*/
list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) {
if (oldirq->intid != intid)
continue;
oldirq = xa_load(&dist->lpi_xa, intid);
if (vgic_try_get_irq_kref(oldirq)) {
/* Someone was faster with adding this LPI, lets use that. */
kfree(irq);
irq = oldirq;
/*
* This increases the refcount, the caller is expected to
* call vgic_put_irq() on the returned pointer once it's
* finished with the IRQ.
*/
vgic_get_irq_kref(irq);
goto out_unlock;
}
ret = xa_err(xa_store(&dist->lpi_xa, intid, irq, 0));
if (ret) {
xa_release(&dist->lpi_xa, intid);
kfree(irq);
goto out_unlock;
}
list_add_tail(&irq->lpi_list, &dist->lpi_list_head);
dist->lpi_list_count++;
atomic_inc(&dist->lpi_count);
out_unlock:
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
if (ret)
return ERR_PTR(ret);
/*
* We "cache" the configuration table entries in our struct vgic_irq's.
* However we only have those structs for mapped IRQs, so we read in
......@@ -311,6 +316,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
return 0;
}
#define GIC_LPI_MAX_INTID ((1 << INTERRUPT_ID_BITS_ITS) - 1)
/*
* Create a snapshot of the current LPIs targeting @vcpu, so that we can
* enumerate those LPIs without holding any lock.
......@@ -319,6 +326,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
{
struct vgic_dist *dist = &kvm->arch.vgic;
XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET);
struct vgic_irq *irq;
unsigned long flags;
u32 *intids;
......@@ -331,13 +339,15 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
* command). If coming from another path (such as enabling LPIs),
* we must be careful not to overrun the array.
*/
irq_count = READ_ONCE(dist->lpi_list_count);
irq_count = atomic_read(&dist->lpi_count);
intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT);
if (!intids)
return -ENOMEM;
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
rcu_read_lock();
xas_for_each(&xas, irq, GIC_LPI_MAX_INTID) {
if (i == irq_count)
break;
/* We don't need to "get" the IRQ, as we hold the list lock. */
......@@ -345,6 +355,8 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
continue;
intids[i++] = irq->intid;
}
rcu_read_unlock();
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
*intid_ptr = intids;
......@@ -592,8 +604,8 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
irq = __vgic_its_check_cache(dist, db, devid, eventid);
if (irq)
vgic_get_irq_kref(irq);
if (!vgic_try_get_irq_kref(irq))
irq = NULL;
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
......@@ -637,8 +649,13 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
* was in the cache, and increment it on the new interrupt.
*/
if (cte->irq)
__vgic_put_lpi_locked(kvm, cte->irq);
vgic_put_irq(kvm, cte->irq);
/*
* The irq refcount is guaranteed to be nonzero while holding the
* its_lock, as the ITE (and the reference it holds) cannot be freed.
*/
lockdep_assert_held(&its->its_lock);
vgic_get_irq_kref(irq);
cte->db = db;
......@@ -669,7 +686,7 @@ void vgic_its_invalidate_cache(struct kvm *kvm)
if (!cte->irq)
break;
__vgic_put_lpi_locked(kvm, cte->irq);
vgic_put_irq(kvm, cte->irq);
cte->irq = NULL;
}
......
......@@ -380,6 +380,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
struct vgic_irq *irq;
gpa_t last_ptr = ~(gpa_t)0;
bool vlpi_avail = false;
unsigned long index;
int ret = 0;
u8 val;
......@@ -396,7 +397,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
vlpi_avail = true;
}
list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
xa_for_each(&dist->lpi_xa, index, irq) {
int byte_offset, bit_nr;
struct kvm_vcpu *vcpu;
gpa_t pendbase, ptr;
......
......@@ -30,7 +30,8 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* its->its_lock (mutex)
* vgic_cpu->ap_list_lock must be taken with IRQs disabled
* kvm->lpi_list_lock must be taken with IRQs disabled
* vgic_irq->irq_lock must be taken with IRQs disabled
* vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled
* vgic_irq->irq_lock must be taken with IRQs disabled
*
* As the ap_list_lock might be taken from the timer interrupt handler,
* we have to disable IRQs before taking this lock and everything lower
......@@ -54,32 +55,22 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
*/
/*
* Iterate over the VM's list of mapped LPIs to find the one with a
* matching interrupt ID and return a reference to the IRQ structure.
* Index the VM's xarray of mapped LPIs and return a reference to the IRQ
* structure. The caller is expected to call vgic_put_irq() later once it's
* finished with the IRQ.
*/
static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct vgic_irq *irq = NULL;
unsigned long flags;
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
if (irq->intid != intid)
continue;
rcu_read_lock();
/*
* This increases the refcount, the caller is expected to
* call vgic_put_irq() later once it's finished with the IRQ.
*/
vgic_get_irq_kref(irq);
goto out_unlock;
}
irq = NULL;
irq = xa_load(&dist->lpi_xa, intid);
if (!vgic_try_get_irq_kref(irq))
irq = NULL;
out_unlock:
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
rcu_read_unlock();
return irq;
}
......@@ -120,22 +111,6 @@ static void vgic_irq_release(struct kref *ref)
{
}
/*
* Drop the refcount on the LPI. Must be called with lpi_list_lock held.
*/
void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
{
struct vgic_dist *dist = &kvm->arch.vgic;
if (!kref_put(&irq->refcount, vgic_irq_release))
return;
list_del(&irq->lpi_list);
dist->lpi_list_count--;
kfree(irq);
}
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{
struct vgic_dist *dist = &kvm->arch.vgic;
......@@ -144,9 +119,15 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
if (irq->intid < VGIC_MIN_LPI)
return;
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
__vgic_put_lpi_locked(kvm, irq);
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
if (!kref_put(&irq->refcount, vgic_irq_release))
return;
xa_lock_irqsave(&dist->lpi_xa, flags);
__xa_erase(&dist->lpi_xa, irq->intid);
xa_unlock_irqrestore(&dist->lpi_xa, flags);
atomic_dec(&dist->lpi_count);
kfree_rcu(irq, rcu);
}
void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
......@@ -404,7 +385,8 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
/*
* Grab a reference to the irq to reflect the fact that it is
* now in the ap_list.
* now in the ap_list. This is safe as the caller must already hold a
* reference on the irq.
*/
vgic_get_irq_kref(irq);
list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
......
......@@ -180,7 +180,6 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
gpa_t addr, int len);
struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
u32 intid);
void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq);
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
bool vgic_get_phys_line_level(struct vgic_irq *irq);
void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
......@@ -220,12 +219,20 @@ void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
void vgic_v2_save_state(struct kvm_vcpu *vcpu);
void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
static inline void vgic_get_irq_kref(struct vgic_irq *irq)
static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq)
{
if (!irq)
return false;
if (irq->intid < VGIC_MIN_LPI)
return;
return true;
kref_get(&irq->refcount);
return kref_get_unless_zero(&irq->refcount);
}
static inline void vgic_get_irq_kref(struct vgic_irq *irq)
{
WARN_ON_ONCE(!vgic_try_get_irq_kref(irq));
}
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
......
......@@ -13,6 +13,7 @@
#include <linux/spinlock.h>
#include <linux/static_key.h>
#include <linux/types.h>
#include <linux/xarray.h>
#include <kvm/iodev.h>
#include <linux/list.h>
#include <linux/jump_label.h>
......@@ -116,7 +117,7 @@ struct irq_ops {
struct vgic_irq {
raw_spinlock_t irq_lock; /* Protects the content of the struct */
struct list_head lpi_list; /* Used to link all LPIs together */
struct rcu_head rcu;
struct list_head ap_list;
struct kvm_vcpu *vcpu; /* SGIs and PPIs: The VCPU
......@@ -273,10 +274,10 @@ struct vgic_dist {
*/
u64 propbaser;
/* Protects the lpi_list and the count value below. */
/* Protects the lpi_list. */
raw_spinlock_t lpi_list_lock;
struct list_head lpi_list_head;
int lpi_list_count;
struct xarray lpi_xa;
atomic_t lpi_count;
/* LPI translation cache */
struct list_head lpi_translation_cache;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment