Commit 948a7b2b authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'irq/sparseirq' into cpus4096

Conflicts:
	arch/x86/kernel/io_apic.c

Merge irq/sparseirq here, to resolve conflicts.
parents 9466d603 17483a1f
......@@ -243,7 +243,7 @@ config X86_HAS_BOOT_CPU_ID
config SPARSE_IRQ
bool "Support sparse irq numbering"
depends on (PCI_MSI || HT_IRQ) && SMP
depends on PCI_MSI || HT_IRQ
default y
help
This enables support for sparse irq, esp for msi/msi-x. You may need
......@@ -251,6 +251,15 @@ config SPARSE_IRQ
If you don't know what to do here, say Y.
config NUMA_MIGRATE_IRQ_DESC
bool "Move irq desc when changing irq smp_affinity"
depends on SPARSE_IRQ && SMP
default n
help
This enables moving irq_desc to cpu/node that irq will use handled.
If you don't know what to do here, say N.
config X86_FIND_SMP_CONFIG
def_bool y
depends on X86_MPPARSE || X86_VOYAGER
......
......@@ -141,6 +141,9 @@ struct irq_cfg {
unsigned move_cleanup_count;
u8 vector;
u8 move_in_progress : 1;
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
u8 move_desc_pending : 1;
#endif
};
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
......@@ -241,6 +244,121 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu)
}
}
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
static void
init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
{
struct irq_pin_list *old_entry, *head, *tail, *entry;
cfg->irq_2_pin = NULL;
old_entry = old_cfg->irq_2_pin;
if (!old_entry)
return;
entry = get_one_free_irq_2_pin(cpu);
if (!entry)
return;
entry->apic = old_entry->apic;
entry->pin = old_entry->pin;
head = entry;
tail = entry;
old_entry = old_entry->next;
while (old_entry) {
entry = get_one_free_irq_2_pin(cpu);
if (!entry) {
entry = head;
while (entry) {
head = entry->next;
kfree(entry);
entry = head;
}
/* still use the old one */
return;
}
entry->apic = old_entry->apic;
entry->pin = old_entry->pin;
tail->next = entry;
tail = entry;
old_entry = old_entry->next;
}
tail->next = NULL;
cfg->irq_2_pin = head;
}
static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
{
struct irq_pin_list *entry, *next;
if (old_cfg->irq_2_pin == cfg->irq_2_pin)
return;
entry = old_cfg->irq_2_pin;
while (entry) {
next = entry->next;
kfree(entry);
entry = next;
}
old_cfg->irq_2_pin = NULL;
}
void arch_init_copy_chip_data(struct irq_desc *old_desc,
struct irq_desc *desc, int cpu)
{
struct irq_cfg *cfg;
struct irq_cfg *old_cfg;
cfg = get_one_free_irq_cfg(cpu);
if (!cfg)
return;
desc->chip_data = cfg;
old_cfg = old_desc->chip_data;
memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
init_copy_irq_2_pin(old_cfg, cfg, cpu);
}
static void free_irq_cfg(struct irq_cfg *old_cfg)
{
kfree(old_cfg);
}
void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
{
struct irq_cfg *old_cfg, *cfg;
old_cfg = old_desc->chip_data;
cfg = desc->chip_data;
if (old_cfg == cfg)
return;
if (old_cfg) {
free_irq_2_pin(old_cfg, cfg);
free_irq_cfg(old_cfg);
old_desc->chip_data = NULL;
}
}
static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
{
struct irq_cfg *cfg = desc->chip_data;
if (!cfg->move_in_progress) {
/* it means that domain is not changed */
if (!cpus_intersects(desc->affinity, mask))
cfg->move_desc_pending = 1;
}
}
#endif
#else
static struct irq_cfg *irq_cfg(unsigned int irq)
{
......@@ -249,10 +367,12 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
#endif
#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
static inline void
set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
{
}
#endif
struct io_apic {
unsigned int index;
......@@ -2397,11 +2517,31 @@ static void irq_complete_move(struct irq_desc **descp)
struct irq_cfg *cfg = desc->chip_data;
unsigned vector, me;
if (likely(!cfg->move_in_progress))
if (likely(!cfg->move_in_progress)) {
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
if (likely(!cfg->move_desc_pending))
return;
/* domain is not change, but affinity is changed */
me = smp_processor_id();
if (cpu_isset(me, desc->affinity)) {
*descp = desc = move_irq_desc(desc, me);
/* get the new one */
cfg = desc->chip_data;
cfg->move_desc_pending = 0;
}
#endif
return;
}
vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
*descp = desc = move_irq_desc(desc, me);
/* get the new one */
cfg = desc->chip_data;
#endif
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
send_cleanup_vector(cfg);
}
......
......@@ -5,6 +5,7 @@
#include <linux/pci.h>
#include <linux/irq.h>
#include <asm/io_apic.h>
#include <asm/smp.h>
#include <linux/intel-iommu.h>
#include "intr_remapping.h"
......
......@@ -27,7 +27,6 @@ static int show_stat(struct seq_file *p, void *v)
u64 sum = 0;
struct timespec boottime;
unsigned int per_irq_sum;
struct irq_desc *desc;
user = nice = system = idle = iowait =
irq = softirq = steal = cputime64_zero;
......@@ -47,8 +46,7 @@ static int show_stat(struct seq_file *p, void *v)
guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
for_each_irq_nr(j) {
#ifdef CONFIG_SPARSE_IRQ
desc = irq_to_desc(j);
if (!desc)
if (!irq_to_desc(j))
continue;
#endif
sum += kstat_irqs_cpu(j, i);
......@@ -98,8 +96,7 @@ static int show_stat(struct seq_file *p, void *v)
for_each_irq_nr(j) {
per_irq_sum = 0;
#ifdef CONFIG_SPARSE_IRQ
desc = irq_to_desc(j);
if (!desc) {
if (!irq_to_desc(j)) {
seq_printf(p, " %u", per_irq_sum);
continue;
}
......
......@@ -228,6 +228,16 @@ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
#endif
static inline struct irq_desc *
irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
{
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
return irq_to_desc(irq);
#else
return desc;
#endif
}
/*
* Migration helpers for obsolete names, they will go away:
*/
......
......@@ -3,3 +3,4 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o
......@@ -353,6 +353,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
spin_lock(&desc->lock);
mask_ack_irq(desc, irq);
desc = irq_remap_to_desc(irq, desc);
if (unlikely(desc->status & IRQ_INPROGRESS))
goto out_unlock;
......@@ -430,6 +431,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
desc->status &= ~IRQ_INPROGRESS;
out:
desc->chip->eoi(irq);
desc = irq_remap_to_desc(irq, desc);
spin_unlock(&desc->lock);
}
......@@ -466,12 +468,14 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
!desc->action)) {
desc->status |= (IRQ_PENDING | IRQ_MASKED);
mask_ack_irq(desc, irq);
desc = irq_remap_to_desc(irq, desc);
goto out_unlock;
}
kstat_incr_irqs_this_cpu(irq, desc);
/* Start handling the irq */
desc->chip->ack(irq);
desc = irq_remap_to_desc(irq, desc);
/* Mark the IRQ currently in progress.*/
desc->status |= IRQ_INPROGRESS;
......@@ -532,8 +536,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
if (desc->chip->eoi)
if (desc->chip->eoi) {
desc->chip->eoi(irq);
desc = irq_remap_to_desc(irq, desc);
}
}
void
......@@ -568,8 +574,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
/* Uninstall? */
if (handle == handle_bad_irq) {
if (desc->chip != &no_irq_chip)
if (desc->chip != &no_irq_chip) {
mask_ack_irq(desc, irq);
desc = irq_remap_to_desc(irq, desc);
}
desc->status |= IRQ_DISABLED;
desc->depth = 1;
}
......
......@@ -23,7 +23,7 @@
/*
* lockdep: we want to handle all irq_desc locks as a single lock-class:
*/
static struct lock_class_key irq_desc_lock_class;
struct lock_class_key irq_desc_lock_class;
/**
* handle_bad_irq - handle spurious and unhandled irqs
......@@ -73,7 +73,7 @@ static struct irq_desc irq_desc_init = {
#endif
};
static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
{
unsigned long bytes;
char *ptr;
......@@ -113,7 +113,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
/*
* Protect the sparse_irqs:
*/
static DEFINE_SPINLOCK(sparse_irq_lock);
DEFINE_SPINLOCK(sparse_irq_lock);
struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly;
......@@ -337,8 +337,11 @@ unsigned int __do_IRQ(unsigned int irq)
/*
* No locking required for CPU-local interrupts:
*/
if (desc->chip->ack)
if (desc->chip->ack) {
desc->chip->ack(irq);
/* get new one */
desc = irq_remap_to_desc(irq, desc);
}
if (likely(!(desc->status & IRQ_DISABLED))) {
action_ret = handle_IRQ_event(irq, desc->action);
if (!noirqdebug)
......@@ -349,8 +352,10 @@ unsigned int __do_IRQ(unsigned int irq)
}
spin_lock(&desc->lock);
if (desc->chip->ack)
if (desc->chip->ack) {
desc->chip->ack(irq);
desc = irq_remap_to_desc(irq, desc);
}
/*
* REPLAY is when Linux resends an IRQ that was dropped earlier
* WAITING is used by probe to mark irqs that are being tested
......
......@@ -13,6 +13,11 @@ extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
unsigned long flags);
extern struct lock_class_key irq_desc_lock_class;
extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
extern spinlock_t sparse_irq_lock;
extern struct irq_desc *irq_desc_ptrs[NR_IRQS];
#ifdef CONFIG_PROC_FS
extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
extern void register_handler_proc(unsigned int irq, struct irqaction *action);
......
/*
* linux/kernel/irq/handle.c
*
* Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
* Copyright (C) 2005-2006, Thomas Gleixner, Russell King
*
* This file contains the core interrupt handling code.
*
* Detailed information is available in Documentation/DocBook/genericirq
*
*/
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include "internals.h"
static void init_copy_kstat_irqs(struct irq_desc *old_desc,
struct irq_desc *desc,
int cpu, int nr)
{
unsigned long bytes;
init_kstat_irqs(desc, cpu, nr);
if (desc->kstat_irqs != old_desc->kstat_irqs) {
/* Compute how many bytes we need per irq and allocate them */
bytes = nr * sizeof(unsigned int);
memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes);
}
}
static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
{
if (old_desc->kstat_irqs == desc->kstat_irqs)
return;
kfree(old_desc->kstat_irqs);
old_desc->kstat_irqs = NULL;
}
static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
struct irq_desc *desc, int cpu)
{
memcpy(desc, old_desc, sizeof(struct irq_desc));
desc->cpu = cpu;
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
arch_init_copy_chip_data(old_desc, desc, cpu);
}
static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
{
free_kstat_irqs(old_desc, desc);
arch_free_chip_data(old_desc, desc);
}
static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
int cpu)
{
struct irq_desc *desc;
unsigned int irq;
unsigned long flags;
int node;
irq = old_desc->irq;
spin_lock_irqsave(&sparse_irq_lock, flags);
/* We have to check it to avoid races with another CPU */
desc = irq_desc_ptrs[irq];
if (desc && old_desc != desc)
goto out_unlock;
node = cpu_to_node(cpu);
desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n",
irq, cpu, node);
if (!desc) {
printk(KERN_ERR "can not get new irq_desc for moving\n");
/* still use old one */
desc = old_desc;
goto out_unlock;
}
init_copy_one_irq_desc(irq, old_desc, desc, cpu);
irq_desc_ptrs[irq] = desc;
/* free the old one */
free_one_irq_desc(old_desc, desc);
kfree(old_desc);
out_unlock:
spin_unlock_irqrestore(&sparse_irq_lock, flags);
return desc;
}
struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
{
int old_cpu;
int node, old_node;
/* those all static, do move them */
if (desc->irq < NR_IRQS_LEGACY)
return desc;
old_cpu = desc->cpu;
printk(KERN_DEBUG
"try to move irq_desc from cpu %d to %d\n", old_cpu, cpu);
if (old_cpu != cpu) {
node = cpu_to_node(cpu);
old_node = cpu_to_node(old_cpu);
if (old_node != node)
desc = __real_move_irq_desc(desc, cpu);
else
desc->cpu = cpu;
}
return desc;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment