Commit 1c8b86a3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xsa441-6.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen fix from Juergen Gross:
 "A fix for the xen events driver:

  Closing of an event channel in the Linux kernel can result in a
  deadlock. This happens when the close is being performed in parallel
  to an unrelated Xen console action and the handling of a Xen console
  interrupt in an unprivileged guest.

  The closing of an event channel is e.g. triggered by removal of a
  paravirtual device on the other side. As this action will cause
  console messages to be issued on the other side quite often, the
  chance of triggering the deadlock is not negligible"

* tag 'xsa441-6.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen/events: replace evtchn_rwlock with RCU
parents 01bbafc6 87797fad
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/irqnr.h> #include <linux/irqnr.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/rcupdate.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/cpuhotplug.h> #include <linux/cpuhotplug.h>
#include <linux/atomic.h> #include <linux/atomic.h>
...@@ -96,6 +97,7 @@ enum xen_irq_type { ...@@ -96,6 +97,7 @@ enum xen_irq_type {
struct irq_info { struct irq_info {
struct list_head list; struct list_head list;
struct list_head eoi_list; struct list_head eoi_list;
struct rcu_work rwork;
short refcnt; short refcnt;
u8 spurious_cnt; u8 spurious_cnt;
u8 is_accounted; u8 is_accounted;
...@@ -146,20 +148,10 @@ const struct evtchn_ops *evtchn_ops; ...@@ -146,20 +148,10 @@ const struct evtchn_ops *evtchn_ops;
*/ */
static DEFINE_MUTEX(irq_mapping_update_lock); static DEFINE_MUTEX(irq_mapping_update_lock);
/*
* Lock protecting event handling loop against removing event channels.
* Adding of event channels is no issue as the associated IRQ becomes active
* only after everything is setup (before request_[threaded_]irq() the handler
* can't be entered for an event, as the event channel will be unmasked only
* then).
*/
static DEFINE_RWLOCK(evtchn_rwlock);
/* /*
* Lock hierarchy: * Lock hierarchy:
* *
* irq_mapping_update_lock * irq_mapping_update_lock
* evtchn_rwlock
* IRQ-desc lock * IRQ-desc lock
* percpu eoi_list_lock * percpu eoi_list_lock
* irq_info->lock * irq_info->lock
...@@ -306,6 +298,22 @@ static void channels_on_cpu_inc(struct irq_info *info) ...@@ -306,6 +298,22 @@ static void channels_on_cpu_inc(struct irq_info *info)
info->is_accounted = 1; info->is_accounted = 1;
} }
static void delayed_free_irq(struct work_struct *work)
{
struct irq_info *info = container_of(to_rcu_work(work), struct irq_info,
rwork);
unsigned int irq = info->irq;
/* Remove the info pointer only now, with no potential users left. */
set_info_for_irq(irq, NULL);
kfree(info);
/* Legacy IRQ descriptors are managed by the arch. */
if (irq >= nr_legacy_irqs())
irq_free_desc(irq);
}
/* Constructors for packed IRQ information. */ /* Constructors for packed IRQ information. */
static int xen_irq_info_common_setup(struct irq_info *info, static int xen_irq_info_common_setup(struct irq_info *info,
unsigned irq, unsigned irq,
...@@ -668,33 +676,36 @@ static void xen_irq_lateeoi_worker(struct work_struct *work) ...@@ -668,33 +676,36 @@ static void xen_irq_lateeoi_worker(struct work_struct *work)
eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
read_lock_irqsave(&evtchn_rwlock, flags); rcu_read_lock();
while (true) { while (true) {
spin_lock(&eoi->eoi_list_lock); spin_lock_irqsave(&eoi->eoi_list_lock, flags);
info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
eoi_list); eoi_list);
if (info == NULL || now < info->eoi_time) { if (info == NULL)
spin_unlock(&eoi->eoi_list_lock); break;
if (now < info->eoi_time) {
mod_delayed_work_on(info->eoi_cpu, system_wq,
&eoi->delayed,
info->eoi_time - now);
break; break;
} }
list_del_init(&info->eoi_list); list_del_init(&info->eoi_list);
spin_unlock(&eoi->eoi_list_lock); spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
info->eoi_time = 0; info->eoi_time = 0;
xen_irq_lateeoi_locked(info, false); xen_irq_lateeoi_locked(info, false);
} }
if (info) spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
mod_delayed_work_on(info->eoi_cpu, system_wq,
&eoi->delayed, info->eoi_time - now);
read_unlock_irqrestore(&evtchn_rwlock, flags); rcu_read_unlock();
} }
static void xen_cpu_init_eoi(unsigned int cpu) static void xen_cpu_init_eoi(unsigned int cpu)
...@@ -709,16 +720,15 @@ static void xen_cpu_init_eoi(unsigned int cpu) ...@@ -709,16 +720,15 @@ static void xen_cpu_init_eoi(unsigned int cpu)
void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
{ {
struct irq_info *info; struct irq_info *info;
unsigned long flags;
read_lock_irqsave(&evtchn_rwlock, flags); rcu_read_lock();
info = info_for_irq(irq); info = info_for_irq(irq);
if (info) if (info)
xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
read_unlock_irqrestore(&evtchn_rwlock, flags); rcu_read_unlock();
} }
EXPORT_SYMBOL_GPL(xen_irq_lateeoi); EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
...@@ -732,6 +742,7 @@ static void xen_irq_init(unsigned irq) ...@@ -732,6 +742,7 @@ static void xen_irq_init(unsigned irq)
info->type = IRQT_UNBOUND; info->type = IRQT_UNBOUND;
info->refcnt = -1; info->refcnt = -1;
INIT_RCU_WORK(&info->rwork, delayed_free_irq);
set_info_for_irq(irq, info); set_info_for_irq(irq, info);
/* /*
...@@ -789,31 +800,18 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) ...@@ -789,31 +800,18 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
static void xen_free_irq(unsigned irq) static void xen_free_irq(unsigned irq)
{ {
struct irq_info *info = info_for_irq(irq); struct irq_info *info = info_for_irq(irq);
unsigned long flags;
if (WARN_ON(!info)) if (WARN_ON(!info))
return; return;
write_lock_irqsave(&evtchn_rwlock, flags);
if (!list_empty(&info->eoi_list)) if (!list_empty(&info->eoi_list))
lateeoi_list_del(info); lateeoi_list_del(info);
list_del(&info->list); list_del(&info->list);
set_info_for_irq(irq, NULL);
WARN_ON(info->refcnt > 0); WARN_ON(info->refcnt > 0);
write_unlock_irqrestore(&evtchn_rwlock, flags); queue_rcu_work(system_wq, &info->rwork);
kfree(info);
/* Legacy IRQ descriptors are managed by the arch. */
if (irq < nr_legacy_irqs())
return;
irq_free_desc(irq);
} }
/* Not called for lateeoi events. */ /* Not called for lateeoi events. */
...@@ -1711,7 +1709,14 @@ int xen_evtchn_do_upcall(void) ...@@ -1711,7 +1709,14 @@ int xen_evtchn_do_upcall(void)
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct evtchn_loop_ctrl ctrl = { 0 }; struct evtchn_loop_ctrl ctrl = { 0 };
read_lock(&evtchn_rwlock); /*
* When closing an event channel the associated IRQ must not be freed
* until all cpus have left the event handling loop. This is ensured
* by taking the rcu_read_lock() while handling events, as freeing of
* the IRQ is handled via queue_rcu_work() _after_ closing the event
* channel.
*/
rcu_read_lock();
do { do {
vcpu_info->evtchn_upcall_pending = 0; vcpu_info->evtchn_upcall_pending = 0;
...@@ -1724,7 +1729,7 @@ int xen_evtchn_do_upcall(void) ...@@ -1724,7 +1729,7 @@ int xen_evtchn_do_upcall(void)
} while (vcpu_info->evtchn_upcall_pending); } while (vcpu_info->evtchn_upcall_pending);
read_unlock(&evtchn_rwlock); rcu_read_unlock();
/* /*
* Increment irq_epoch only now to defer EOIs only for * Increment irq_epoch only now to defer EOIs only for
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment