Commit 5a3f75e3 authored by Thomas Gleixner's avatar Thomas Gleixner

x86/irq: Plug irq vector hotplug race

Jin debugged a nasty cpu hotplug race which results in leaking a irq
vector on the newly hotplugged cpu.

cpu N				cpu M
native_cpu_up                   device_shutdown
  do_boot_cpu			  free_msi_irqs
  start_secondary                   arch_teardown_msi_irqs
    smp_callin                        default_teardown_msi_irqs
       setup_vector_irq                  arch_teardown_msi_irq
        __setup_vector_irq		   native_teardown_msi_irq
          lock(vector_lock)		     destroy_irq 
          install vectors
          unlock(vector_lock)
					       lock(vector_lock)
--->                                  	       __clear_irq_vector
                                    	       unlock(vector_lock)
    lock(vector_lock)
    set_cpu_online
    unlock(vector_lock)

This leaves the irq vector(s) which are torn down on CPU M stale in
the vector array of CPU N, because CPU M does not see CPU N online
yet. There is a similar issue with concurrent newly setup interrupts.

The alloc/free protection of irq descriptors does not prevent the
above race, because it merily prevents interrupt descriptors from
going away or changing concurrently.

Prevent this by moving the call to setup_vector_irq() into the
vector_lock held region which protects set_cpu_online():

cpu N				cpu M
native_cpu_up                   device_shutdown
  do_boot_cpu			  free_msi_irqs
  start_secondary                   arch_teardown_msi_irqs
    smp_callin                        default_teardown_msi_irqs
       lock(vector_lock)                arch_teardown_msi_irq
       setup_vector_irq()
        __setup_vector_irq		   native_teardown_msi_irq
          install vectors		     destroy_irq 
       set_cpu_online
       unlock(vector_lock)
					       lock(vector_lock)
                                  	       __clear_irq_vector
                                    	       unlock(vector_lock)

So cpu M either sees the cpu N online before clearing the vector or
cpu N installs the vectors after cpu M has cleared it.
Reported-by: default avatarxiao jin <jin.xiao@intel.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Link: http://lkml.kernel.org/r/20150705171102.141898931@linutronix.de
parent 827a82ff
......@@ -409,12 +409,6 @@ static void __setup_vector_irq(int cpu)
int irq, vector;
struct apic_chip_data *data;
/*
* vector_lock will make sure that we don't run into irq vector
* assignments that might be happening on another cpu in parallel,
* while we setup our initial vector to irq mappings.
*/
raw_spin_lock(&vector_lock);
/* Mark the inuse vectors */
for_each_active_irq(irq) {
data = apic_chip_data(irq_get_irq_data(irq));
......@@ -436,16 +430,16 @@ static void __setup_vector_irq(int cpu)
if (!cpumask_test_cpu(cpu, data->domain))
per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
}
raw_spin_unlock(&vector_lock);
}
/*
* Setup the vector to irq mappings.
* Setup the vector to irq mappings. Must be called with vector_lock held.
*/
void setup_vector_irq(int cpu)
{
int irq;
lockdep_assert_held(&vector_lock);
/*
* On most of the platforms, legacy PIC delivers the interrupts on the
* boot cpu. But there are certain platforms where PIC interrupts are
......
......@@ -170,11 +170,6 @@ static void smp_callin(void)
*/
apic_ap_setup();
/*
* Need to setup vector mappings before we enable interrupts.
*/
setup_vector_irq(smp_processor_id());
/*
* Save our processor parameters. Note: this information
* is needed for clock calibration.
......@@ -239,11 +234,13 @@ static void notrace start_secondary(void *unused)
check_tsc_sync_target();
/*
* We need to hold vector_lock so there the set of online cpus
* does not change while we are assigning vectors to cpus. Holding
* this lock ensures we don't half assign or remove an irq from a cpu.
* Lock vector_lock and initialize the vectors on this cpu
* before setting the cpu online. We must set it online with
* vector_lock held to prevent a concurrent setup/teardown
* from seeing a half valid vector space.
*/
lock_vector_lock();
setup_vector_irq(smp_processor_id());
set_cpu_online(smp_processor_id(), true);
unlock_vector_lock();
cpu_set_state_online(smp_processor_id());
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment