Commit ced5b697 authored by Brandon Phiilps's avatar Brandon Phiilps Committed by H. Peter Anvin

x86: Avoid race condition in pci_enable_msix()

Keep chip_data in create_irq_nr and destroy_irq.

When two drivers are setting up MSI-X at the same time via
pci_enable_msix() there is a race.  See this dmesg excerpt:

[   85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X
[   85.170611]   alloc irq_desc for 99 on node -1
[   85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X
[   85.170614]   alloc kstat_irqs on node -1
[   85.170616] alloc irq_2_iommu on node -1
[   85.170617]   alloc irq_desc for 100 on node -1
[   85.170619]   alloc kstat_irqs on node -1
[   85.170621] alloc irq_2_iommu on node -1
[   85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X
[   85.170626]   alloc irq_desc for 101 on node -1
[   85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X
[   85.170630]   alloc kstat_irqs on node -1
[   85.170631] alloc irq_2_iommu on node -1
[   85.170635]   alloc irq_desc for 102 on node -1
[   85.170636]   alloc kstat_irqs on node -1
[   85.170639] alloc irq_2_iommu on node -1
[   85.170646] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000088

As you can see igb and ixgbe are both alternating on create_irq_nr()
via pci_enable_msix() in their probe function.

ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe
choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and
calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data =
NULL via dynamic_irq_init().

igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[]
via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this:

	cfg_new = irq_desc_ptrs[102]->chip_data;
	if (cfg_new->vector != 0)
		continue;

This hits the NULL deref.

Another possible race exists via pci_disable_msix() in a driver or in
the number of error paths that call free_msi_irqs():

destroy_irq()
dynamic_irq_cleanup() which sets desc->chip_data = NULL
...race window...
desc->chip_data = cfg;

Remove the save and restore code for cfg in create_irq_nr() and
destroy_irq() and take the desc->lock when checking the irq_cfg.
Reported-and-analyzed-by: default avatarBrandon Philips <bphilips@suse.de>
Signed-off-by: default avatarYinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-3-git-send-email-yinghai@kernel.org>
Signed-off-by: default avatarBrandon Phililps <bphilips@suse.de>
Cc: stable@kernel.org
Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
parent e28cab42
...@@ -3228,12 +3228,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) ...@@ -3228,12 +3228,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
} }
spin_unlock_irqrestore(&vector_lock, flags); spin_unlock_irqrestore(&vector_lock, flags);
if (irq > 0) { if (irq > 0)
dynamic_irq_init(irq); dynamic_irq_init_keep_chip_data(irq);
/* restore it, in case dynamic_irq_init clear it */
if (desc_new)
desc_new->chip_data = cfg_new;
}
return irq; return irq;
} }
...@@ -3256,17 +3253,12 @@ void destroy_irq(unsigned int irq) ...@@ -3256,17 +3253,12 @@ void destroy_irq(unsigned int irq)
{ {
unsigned long flags; unsigned long flags;
struct irq_cfg *cfg; struct irq_cfg *cfg;
struct irq_desc *desc;
/* store it, in case dynamic_irq_cleanup clear it */ dynamic_irq_cleanup_keep_chip_data(irq);
desc = irq_to_desc(irq);
cfg = desc->chip_data;
dynamic_irq_cleanup(irq);
/* connect back irq_cfg */
desc->chip_data = cfg;
free_irte(irq); free_irte(irq);
spin_lock_irqsave(&vector_lock, flags); spin_lock_irqsave(&vector_lock, flags);
cfg = irq_to_desc(irq)->chip_data;
__clear_irq_vector(irq, cfg); __clear_irq_vector(irq, cfg);
spin_unlock_irqrestore(&vector_lock, flags); spin_unlock_irqrestore(&vector_lock, flags);
} }
......
...@@ -400,7 +400,9 @@ static inline int irq_has_action(unsigned int irq) ...@@ -400,7 +400,9 @@ static inline int irq_has_action(unsigned int irq)
/* Dynamic irq helper functions */ /* Dynamic irq helper functions */
extern void dynamic_irq_init(unsigned int irq); extern void dynamic_irq_init(unsigned int irq);
void dynamic_irq_init_keep_chip_data(unsigned int irq);
extern void dynamic_irq_cleanup(unsigned int irq); extern void dynamic_irq_cleanup(unsigned int irq);
void dynamic_irq_cleanup_keep_chip_data(unsigned int irq);
/* Set/get chip/data for an IRQ: */ /* Set/get chip/data for an IRQ: */
extern int set_irq_chip(unsigned int irq, struct irq_chip *chip); extern int set_irq_chip(unsigned int irq, struct irq_chip *chip);
......
...@@ -18,11 +18,7 @@ ...@@ -18,11 +18,7 @@
#include "internals.h" #include "internals.h"
/** static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data)
* dynamic_irq_init - initialize a dynamically allocated irq
* @irq: irq number to initialize
*/
void dynamic_irq_init(unsigned int irq)
{ {
struct irq_desc *desc; struct irq_desc *desc;
unsigned long flags; unsigned long flags;
...@@ -41,6 +37,7 @@ void dynamic_irq_init(unsigned int irq) ...@@ -41,6 +37,7 @@ void dynamic_irq_init(unsigned int irq)
desc->depth = 1; desc->depth = 1;
desc->msi_desc = NULL; desc->msi_desc = NULL;
desc->handler_data = NULL; desc->handler_data = NULL;
if (!keep_chip_data)
desc->chip_data = NULL; desc->chip_data = NULL;
desc->action = NULL; desc->action = NULL;
desc->irq_count = 0; desc->irq_count = 0;
...@@ -55,10 +52,26 @@ void dynamic_irq_init(unsigned int irq) ...@@ -55,10 +52,26 @@ void dynamic_irq_init(unsigned int irq)
} }
/** /**
* dynamic_irq_cleanup - cleanup a dynamically allocated irq * dynamic_irq_init - initialize a dynamically allocated irq
* @irq: irq number to initialize * @irq: irq number to initialize
*/ */
void dynamic_irq_cleanup(unsigned int irq) void dynamic_irq_init(unsigned int irq)
{
dynamic_irq_init_x(irq, false);
}
/**
* dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq
* @irq: irq number to initialize
*
* does not set irq_to_desc(irq)->chip_data to NULL
*/
void dynamic_irq_init_keep_chip_data(unsigned int irq)
{
dynamic_irq_init_x(irq, true);
}
static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data)
{ {
struct irq_desc *desc = irq_to_desc(irq); struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags; unsigned long flags;
...@@ -77,6 +90,7 @@ void dynamic_irq_cleanup(unsigned int irq) ...@@ -77,6 +90,7 @@ void dynamic_irq_cleanup(unsigned int irq)
} }
desc->msi_desc = NULL; desc->msi_desc = NULL;
desc->handler_data = NULL; desc->handler_data = NULL;
if (!keep_chip_data)
desc->chip_data = NULL; desc->chip_data = NULL;
desc->handle_irq = handle_bad_irq; desc->handle_irq = handle_bad_irq;
desc->chip = &no_irq_chip; desc->chip = &no_irq_chip;
...@@ -85,6 +99,26 @@ void dynamic_irq_cleanup(unsigned int irq) ...@@ -85,6 +99,26 @@ void dynamic_irq_cleanup(unsigned int irq)
raw_spin_unlock_irqrestore(&desc->lock, flags); raw_spin_unlock_irqrestore(&desc->lock, flags);
} }
/**
* dynamic_irq_cleanup - cleanup a dynamically allocated irq
* @irq: irq number to initialize
*/
void dynamic_irq_cleanup(unsigned int irq)
{
dynamic_irq_cleanup_x(irq, false);
}
/**
* dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq
* @irq: irq number to initialize
*
* does not set irq_to_desc(irq)->chip_data to NULL
*/
void dynamic_irq_cleanup_keep_chip_data(unsigned int irq)
{
dynamic_irq_cleanup_x(irq, true);
}
/** /**
* set_irq_chip - set the irq chip for an irq * set_irq_chip - set the irq chip for an irq
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment