Commit 9e58df97 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'irq-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull irq updates from Thomas Gleixner:
 "Updates for the interrupt subsystem:

  Core:

   - Move the interrupt affinity spreading mechanism into lib/group_cpus
     so it can be used for similar spreading requirements, e.g. in the
     block multi-queue code

     This also contains a first usecase in the block multi-queue code
     which Jens asked to take along with the librarization

   - Improve irqdomain locking to close a number race conditions which
     can be observed with massive parallel device driver probing

   - Enforce and document the semantics of disable_irq() which cannot be
     invoked safely from non-sleepable context

   - Move the IPI multiplexing code from the Apple AIC driver into the
     core, so it can be reused by RISCV

  Drivers:

   - Plug OF node refcounting leaks in various drivers

   - Correctly mark level triggered interrupts in the Broadcom L2
     drivers

   - The usual small fixes and improvements

   - No new drivers for the record!"

* tag 'irq-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (42 commits)
  irqchip/irq-bcm7120-l2: Set IRQ_LEVEL for level triggered interrupts
  irqchip/irq-brcmstb-l2: Set IRQ_LEVEL for level triggered interrupts
  irqdomain: Switch to per-domain locking
  irqchip/mvebu-odmi: Use irq_domain_create_hierarchy()
  irqchip/loongson-pch-msi: Use irq_domain_create_hierarchy()
  irqchip/gic-v3-mbi: Use irq_domain_create_hierarchy()
  irqchip/gic-v3-its: Use irq_domain_create_hierarchy()
  irqchip/gic-v2m: Use irq_domain_create_hierarchy()
  irqchip/alpine-msi: Use irq_domain_add_hierarchy()
  x86/uv: Use irq_domain_create_hierarchy()
  x86/ioapic: Use irq_domain_create_hierarchy()
  irqdomain: Clean up irq_domain_push/pop_irq()
  irqdomain: Drop leftover brackets
  irqdomain: Drop dead domain-name assignment
  irqdomain: Drop revmap mutex
  irqdomain: Fix domain registration race
  irqdomain: Fix mapping-creation race
  irqdomain: Refactor __irq_domain_alloc_irqs()
  irqdomain: Look for existing mapping only once
  irqdomain: Drop bogus fwspec-mapping error handling
  ...
parents 560b8030 6f3ee0e2
...@@ -1277,11 +1277,11 @@ Manfred Spraul points out that you can still do this, even if the data ...@@ -1277,11 +1277,11 @@ Manfred Spraul points out that you can still do this, even if the data
is very occasionally accessed in user context or softirqs/tasklets. The is very occasionally accessed in user context or softirqs/tasklets. The
irq handler doesn't use a lock, and all other accesses are done as so:: irq handler doesn't use a lock, and all other accesses are done as so::
spin_lock(&lock); mutex_lock(&lock);
disable_irq(irq); disable_irq(irq);
... ...
enable_irq(irq); enable_irq(irq);
spin_unlock(&lock); mutex_unlock(&lock);
The disable_irq() prevents the irq handler from running The disable_irq() prevents the irq handler from running
(and waits for it to finish if it's currently running on other CPUs). (and waits for it to finish if it's currently running on other CPUs).
......
...@@ -1307,11 +1307,11 @@ se i dati vengono occasionalmente utilizzati da un contesto utente o ...@@ -1307,11 +1307,11 @@ se i dati vengono occasionalmente utilizzati da un contesto utente o
da un'interruzione software. Il gestore d'interruzione non utilizza alcun da un'interruzione software. Il gestore d'interruzione non utilizza alcun
*lock*, e tutti gli altri accessi verranno fatti così:: *lock*, e tutti gli altri accessi verranno fatti così::
spin_lock(&lock); mutex_lock(&lock);
disable_irq(irq); disable_irq(irq);
... ...
enable_irq(irq); enable_irq(irq);
spin_unlock(&lock); mutex_unlock(&lock);
La funzione disable_irq() impedisce al gestore d'interruzioni La funzione disable_irq() impedisce al gestore d'interruzioni
d'essere eseguito (e aspetta che finisca nel caso fosse in esecuzione su d'essere eseguito (e aspetta che finisca nel caso fosse in esecuzione su
......
...@@ -10777,6 +10777,8 @@ L: linux-kernel@vger.kernel.org ...@@ -10777,6 +10777,8 @@ L: linux-kernel@vger.kernel.org
S: Maintained S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
F: kernel/irq/ F: kernel/irq/
F: include/linux/group_cpus.h
F: lib/group_cpus.c
IRQCHIP DRIVERS IRQCHIP DRIVERS
M: Thomas Gleixner <tglx@linutronix.de> M: Thomas Gleixner <tglx@linutronix.de>
......
...@@ -2364,9 +2364,8 @@ static int mp_irqdomain_create(int ioapic) ...@@ -2364,9 +2364,8 @@ static int mp_irqdomain_create(int ioapic)
return -ENODEV; return -ENODEV;
} }
ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops, ip->irqdomain = irq_domain_create_hierarchy(parent, 0, hwirqs, fn, cfg->ops,
(void *)(long)ioapic); (void *)(long)ioapic);
if (!ip->irqdomain) { if (!ip->irqdomain) {
/* Release fw handle if it was allocated above */ /* Release fw handle if it was allocated above */
if (!cfg->dev) if (!cfg->dev)
...@@ -2374,8 +2373,6 @@ static int mp_irqdomain_create(int ioapic) ...@@ -2374,8 +2373,6 @@ static int mp_irqdomain_create(int ioapic)
return -ENOMEM; return -ENOMEM;
} }
ip->irqdomain->parent = parent;
if (cfg->type == IOAPIC_DOMAIN_LEGACY || if (cfg->type == IOAPIC_DOMAIN_LEGACY ||
cfg->type == IOAPIC_DOMAIN_STRICT) cfg->type == IOAPIC_DOMAIN_STRICT)
ioapic_dynirq_base = max(ioapic_dynirq_base, ioapic_dynirq_base = max(ioapic_dynirq_base,
......
...@@ -166,10 +166,9 @@ static struct irq_domain *uv_get_irq_domain(void) ...@@ -166,10 +166,9 @@ static struct irq_domain *uv_get_irq_domain(void)
if (!fn) if (!fn)
goto out; goto out;
uv_domain = irq_domain_create_tree(fn, &uv_domain_ops, NULL); uv_domain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, fn,
if (uv_domain) &uv_domain_ops, NULL);
uv_domain->parent = x86_vector_domain; if (!uv_domain)
else
irq_domain_free_fwnode(fn); irq_domain_free_fwnode(fn);
out: out:
mutex_unlock(&uv_lock); mutex_unlock(&uv_lock);
......
...@@ -10,66 +10,29 @@ ...@@ -10,66 +10,29 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/group_cpus.h>
#include <linux/blk-mq.h> #include <linux/blk-mq.h>
#include "blk.h" #include "blk.h"
#include "blk-mq.h" #include "blk-mq.h"
static int queue_index(struct blk_mq_queue_map *qmap,
unsigned int nr_queues, const int q)
{
return qmap->queue_offset + (q % nr_queues);
}
static int get_first_sibling(unsigned int cpu)
{
unsigned int ret;
ret = cpumask_first(topology_sibling_cpumask(cpu));
if (ret < nr_cpu_ids)
return ret;
return cpu;
}
void blk_mq_map_queues(struct blk_mq_queue_map *qmap) void blk_mq_map_queues(struct blk_mq_queue_map *qmap)
{ {
unsigned int *map = qmap->mq_map; const struct cpumask *masks;
unsigned int nr_queues = qmap->nr_queues; unsigned int queue, cpu;
unsigned int cpu, first_sibling, q = 0;
masks = group_cpus_evenly(qmap->nr_queues);
if (!masks) {
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
map[cpu] = -1; qmap->mq_map[cpu] = qmap->queue_offset;
return;
/*
* Spread queues among present CPUs first for minimizing
* count of dead queues which are mapped by all un-present CPUs
*/
for_each_present_cpu(cpu) {
if (q >= nr_queues)
break;
map[cpu] = queue_index(qmap, nr_queues, q++);
} }
for_each_possible_cpu(cpu) { for (queue = 0; queue < qmap->nr_queues; queue++) {
if (map[cpu] != -1) for_each_cpu(cpu, &masks[queue])
continue; qmap->mq_map[cpu] = qmap->queue_offset + queue;
/*
* First do sequential mapping between CPUs and queues.
* In case we still have CPUs to map, and we have some number of
* threads per cores then map sibling threads to the same queue
* for performance optimizations.
*/
if (q < nr_queues) {
map[cpu] = queue_index(qmap, nr_queues, q++);
} else {
first_sibling = get_first_sibling(cpu);
if (first_sibling == cpu)
map[cpu] = queue_index(qmap, nr_queues, q++);
else
map[cpu] = map[first_sibling];
}
} }
kfree(masks);
} }
EXPORT_SYMBOL_GPL(blk_mq_map_queues); EXPORT_SYMBOL_GPL(blk_mq_map_queues);
......
...@@ -384,7 +384,7 @@ config LS_EXTIRQ ...@@ -384,7 +384,7 @@ config LS_EXTIRQ
config LS_SCFG_MSI config LS_SCFG_MSI
def_bool y if SOC_LS1021A || ARCH_LAYERSCAPE def_bool y if SOC_LS1021A || ARCH_LAYERSCAPE
depends on PCI && PCI_MSI depends on PCI_MSI
config PARTITION_PERCPU config PARTITION_PERCPU
bool bool
...@@ -653,6 +653,7 @@ config APPLE_AIC ...@@ -653,6 +653,7 @@ config APPLE_AIC
bool "Apple Interrupt Controller (AIC)" bool "Apple Interrupt Controller (AIC)"
depends on ARM64 depends on ARM64
depends on ARCH_APPLE || COMPILE_TEST depends on ARCH_APPLE || COMPILE_TEST
select GENERIC_IRQ_IPI_MUX
help help
Support for the Apple Interrupt Controller found on Apple Silicon SoCs, Support for the Apple Interrupt Controller found on Apple Silicon SoCs,
such as the M1. such as the M1.
......
...@@ -199,12 +199,13 @@ static int alpine_msix_init_domains(struct alpine_msix_data *priv, ...@@ -199,12 +199,13 @@ static int alpine_msix_init_domains(struct alpine_msix_data *priv,
} }
gic_domain = irq_find_host(gic_node); gic_domain = irq_find_host(gic_node);
of_node_put(gic_node);
if (!gic_domain) { if (!gic_domain) {
pr_err("Failed to find the GIC domain\n"); pr_err("Failed to find the GIC domain\n");
return -ENXIO; return -ENXIO;
} }
middle_domain = irq_domain_add_tree(NULL, middle_domain = irq_domain_add_hierarchy(gic_domain, 0, 0, NULL,
&alpine_msix_middle_domain_ops, &alpine_msix_middle_domain_ops,
priv); priv);
if (!middle_domain) { if (!middle_domain) {
...@@ -212,8 +213,6 @@ static int alpine_msix_init_domains(struct alpine_msix_data *priv, ...@@ -212,8 +213,6 @@ static int alpine_msix_init_domains(struct alpine_msix_data *priv,
return -ENOMEM; return -ENOMEM;
} }
middle_domain->parent = gic_domain;
msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node), msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node),
&alpine_msix_domain_info, &alpine_msix_domain_info,
middle_domain); middle_domain);
......
...@@ -292,7 +292,6 @@ struct aic_irq_chip { ...@@ -292,7 +292,6 @@ struct aic_irq_chip {
void __iomem *base; void __iomem *base;
void __iomem *event; void __iomem *event;
struct irq_domain *hw_domain; struct irq_domain *hw_domain;
struct irq_domain *ipi_domain;
struct { struct {
cpumask_t aff; cpumask_t aff;
} *fiq_aff[AIC_NR_FIQ]; } *fiq_aff[AIC_NR_FIQ];
...@@ -307,9 +306,6 @@ struct aic_irq_chip { ...@@ -307,9 +306,6 @@ struct aic_irq_chip {
static DEFINE_PER_CPU(uint32_t, aic_fiq_unmasked); static DEFINE_PER_CPU(uint32_t, aic_fiq_unmasked);
static DEFINE_PER_CPU(atomic_t, aic_vipi_flag);
static DEFINE_PER_CPU(atomic_t, aic_vipi_enable);
static struct aic_irq_chip *aic_irqc; static struct aic_irq_chip *aic_irqc;
static void aic_handle_ipi(struct pt_regs *regs); static void aic_handle_ipi(struct pt_regs *regs);
...@@ -751,98 +747,8 @@ static void aic_ipi_send_fast(int cpu) ...@@ -751,98 +747,8 @@ static void aic_ipi_send_fast(int cpu)
isb(); isb();
} }
static void aic_ipi_mask(struct irq_data *d)
{
u32 irq_bit = BIT(irqd_to_hwirq(d));
/* No specific ordering requirements needed here. */
atomic_andnot(irq_bit, this_cpu_ptr(&aic_vipi_enable));
}
static void aic_ipi_unmask(struct irq_data *d)
{
struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
u32 irq_bit = BIT(irqd_to_hwirq(d));
atomic_or(irq_bit, this_cpu_ptr(&aic_vipi_enable));
/*
* The atomic_or() above must complete before the atomic_read()
* below to avoid racing aic_ipi_send_mask().
*/
smp_mb__after_atomic();
/*
* If a pending vIPI was unmasked, raise a HW IPI to ourselves.
* No barriers needed here since this is a self-IPI.
*/
if (atomic_read(this_cpu_ptr(&aic_vipi_flag)) & irq_bit) {
if (static_branch_likely(&use_fast_ipi))
aic_ipi_send_fast(smp_processor_id());
else
aic_ic_write(ic, AIC_IPI_SEND, AIC_IPI_SEND_CPU(smp_processor_id()));
}
}
static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
{
struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d);
u32 irq_bit = BIT(irqd_to_hwirq(d));
u32 send = 0;
int cpu;
unsigned long pending;
for_each_cpu(cpu, mask) {
/*
* This sequence is the mirror of the one in aic_ipi_unmask();
* see the comment there. Additionally, release semantics
* ensure that the vIPI flag set is ordered after any shared
* memory accesses that precede it. This therefore also pairs
* with the atomic_fetch_andnot in aic_handle_ipi().
*/
pending = atomic_fetch_or_release(irq_bit, per_cpu_ptr(&aic_vipi_flag, cpu));
/*
* The atomic_fetch_or_release() above must complete before the
* atomic_read() below to avoid racing aic_ipi_unmask().
*/
smp_mb__after_atomic();
if (!(pending & irq_bit) &&
(atomic_read(per_cpu_ptr(&aic_vipi_enable, cpu)) & irq_bit)) {
if (static_branch_likely(&use_fast_ipi))
aic_ipi_send_fast(cpu);
else
send |= AIC_IPI_SEND_CPU(cpu);
}
}
/*
* The flag writes must complete before the physical IPI is issued
* to another CPU. This is implied by the control dependency on
* the result of atomic_read_acquire() above, which is itself
* already ordered after the vIPI flag write.
*/
if (send)
aic_ic_write(ic, AIC_IPI_SEND, send);
}
static struct irq_chip ipi_chip = {
.name = "AIC-IPI",
.irq_mask = aic_ipi_mask,
.irq_unmask = aic_ipi_unmask,
.ipi_send_mask = aic_ipi_send_mask,
};
/*
* IPI IRQ domain
*/
static void aic_handle_ipi(struct pt_regs *regs) static void aic_handle_ipi(struct pt_regs *regs)
{ {
int i;
unsigned long enabled, firing;
/* /*
* Ack the IPI. We need to order this after the AIC event read, but * Ack the IPI. We need to order this after the AIC event read, but
* that is enforced by normal MMIO ordering guarantees. * that is enforced by normal MMIO ordering guarantees.
...@@ -857,27 +763,7 @@ static void aic_handle_ipi(struct pt_regs *regs) ...@@ -857,27 +763,7 @@ static void aic_handle_ipi(struct pt_regs *regs)
aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER); aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER);
} }
/* ipi_mux_process();
* The mask read does not need to be ordered. Only we can change
* our own mask anyway, so no races are possible here, as long as
* we are properly in the interrupt handler (which is covered by
* the barrier that is part of the top-level AIC handler's readl()).
*/
enabled = atomic_read(this_cpu_ptr(&aic_vipi_enable));
/*
* Clear the IPIs we are about to handle. This pairs with the
* atomic_fetch_or_release() in aic_ipi_send_mask(), and needs to be
* ordered after the aic_ic_write() above (to avoid dropping vIPIs) and
* before IPI handling code (to avoid races handling vIPIs before they
* are signaled). The former is taken care of by the release semantics
* of the write portion, while the latter is taken care of by the
* acquire semantics of the read portion.
*/
firing = atomic_fetch_andnot(enabled, this_cpu_ptr(&aic_vipi_flag)) & enabled;
for_each_set_bit(i, &firing, AIC_NR_SWIPI)
generic_handle_domain_irq(aic_irqc->ipi_domain, i);
/* /*
* No ordering needed here; at worst this just changes the timing of * No ordering needed here; at worst this just changes the timing of
...@@ -887,55 +773,24 @@ static void aic_handle_ipi(struct pt_regs *regs) ...@@ -887,55 +773,24 @@ static void aic_handle_ipi(struct pt_regs *regs)
aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER); aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER);
} }
static int aic_ipi_alloc(struct irq_domain *d, unsigned int virq, static void aic_ipi_send_single(unsigned int cpu)
unsigned int nr_irqs, void *args)
{
int i;
for (i = 0; i < nr_irqs; i++) {
irq_set_percpu_devid(virq + i);
irq_domain_set_info(d, virq + i, i, &ipi_chip, d->host_data,
handle_percpu_devid_irq, NULL, NULL);
}
return 0;
}
static void aic_ipi_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs)
{ {
/* Not freeing IPIs */ if (static_branch_likely(&use_fast_ipi))
aic_ipi_send_fast(cpu);
else
aic_ic_write(aic_irqc, AIC_IPI_SEND, AIC_IPI_SEND_CPU(cpu));
} }
static const struct irq_domain_ops aic_ipi_domain_ops = {
.alloc = aic_ipi_alloc,
.free = aic_ipi_free,
};
static int __init aic_init_smp(struct aic_irq_chip *irqc, struct device_node *node) static int __init aic_init_smp(struct aic_irq_chip *irqc, struct device_node *node)
{ {
struct irq_domain *ipi_domain;
int base_ipi; int base_ipi;
ipi_domain = irq_domain_create_linear(irqc->hw_domain->fwnode, AIC_NR_SWIPI, base_ipi = ipi_mux_create(AIC_NR_SWIPI, aic_ipi_send_single);
&aic_ipi_domain_ops, irqc); if (WARN_ON(base_ipi <= 0))
if (WARN_ON(!ipi_domain))
return -ENODEV; return -ENODEV;
ipi_domain->flags |= IRQ_DOMAIN_FLAG_IPI_SINGLE;
irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI);
base_ipi = __irq_domain_alloc_irqs(ipi_domain, -1, AIC_NR_SWIPI,
NUMA_NO_NODE, NULL, false, NULL);
if (WARN_ON(!base_ipi)) {
irq_domain_remove(ipi_domain);
return -ENODEV;
}
set_smp_ipi_range(base_ipi, AIC_NR_SWIPI); set_smp_ipi_range(base_ipi, AIC_NR_SWIPI);
irqc->ipi_domain = ipi_domain;
return 0; return 0;
} }
......
...@@ -454,8 +454,7 @@ static __init void armada_xp_ipi_init(struct device_node *node) ...@@ -454,8 +454,7 @@ static __init void armada_xp_ipi_init(struct device_node *node)
return; return;
irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI); irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI);
base_ipi = __irq_domain_alloc_irqs(ipi_domain, -1, IPI_DOORBELL_END, base_ipi = irq_domain_alloc_irqs(ipi_domain, IPI_DOORBELL_END, NUMA_NO_NODE, NULL);
NUMA_NO_NODE, NULL, false, NULL);
if (WARN_ON(!base_ipi)) if (WARN_ON(!base_ipi))
return; return;
......
...@@ -17,8 +17,9 @@ ...@@ -17,8 +17,9 @@
#define ASPEED_SCU_IC_REG 0x018 #define ASPEED_SCU_IC_REG 0x018
#define ASPEED_SCU_IC_SHIFT 0 #define ASPEED_SCU_IC_SHIFT 0
#define ASPEED_SCU_IC_ENABLE GENMASK(6, ASPEED_SCU_IC_SHIFT) #define ASPEED_SCU_IC_ENABLE GENMASK(15, ASPEED_SCU_IC_SHIFT)
#define ASPEED_SCU_IC_NUM_IRQS 7 #define ASPEED_SCU_IC_NUM_IRQS 7
#define ASPEED_SCU_IC_STATUS GENMASK(28, 16)
#define ASPEED_SCU_IC_STATUS_SHIFT 16 #define ASPEED_SCU_IC_STATUS_SHIFT 16
#define ASPEED_AST2600_SCU_IC0_REG 0x560 #define ASPEED_AST2600_SCU_IC0_REG 0x560
...@@ -155,6 +156,8 @@ static int aspeed_scu_ic_of_init_common(struct aspeed_scu_ic *scu_ic, ...@@ -155,6 +156,8 @@ static int aspeed_scu_ic_of_init_common(struct aspeed_scu_ic *scu_ic,
rc = PTR_ERR(scu_ic->scu); rc = PTR_ERR(scu_ic->scu);
goto err; goto err;
} }
regmap_write_bits(scu_ic->scu, scu_ic->reg, ASPEED_SCU_IC_STATUS, ASPEED_SCU_IC_STATUS);
regmap_write_bits(scu_ic->scu, scu_ic->reg, ASPEED_SCU_IC_ENABLE, 0);
irq = irq_of_parse_and_map(node, 0); irq = irq_of_parse_and_map(node, 0);
if (!irq) { if (!irq) {
......
...@@ -268,10 +268,7 @@ static void __init bcm2836_arm_irqchip_smp_init(void) ...@@ -268,10 +268,7 @@ static void __init bcm2836_arm_irqchip_smp_init(void)
ipi_domain->flags |= IRQ_DOMAIN_FLAG_IPI_SINGLE; ipi_domain->flags |= IRQ_DOMAIN_FLAG_IPI_SINGLE;
irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI); irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI);
base_ipi = __irq_domain_alloc_irqs(ipi_domain, -1, BITS_PER_MBOX, base_ipi = irq_domain_alloc_irqs(ipi_domain, BITS_PER_MBOX, NUMA_NO_NODE, NULL);
NUMA_NO_NODE, NULL,
false, NULL);
if (WARN_ON(!base_ipi)) if (WARN_ON(!base_ipi))
return; return;
......
...@@ -279,7 +279,8 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn, ...@@ -279,7 +279,8 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn,
flags |= IRQ_GC_BE_IO; flags |= IRQ_GC_BE_IO;
ret = irq_alloc_domain_generic_chips(data->domain, IRQS_PER_WORD, 1, ret = irq_alloc_domain_generic_chips(data->domain, IRQS_PER_WORD, 1,
dn->full_name, handle_level_irq, clr, 0, flags); dn->full_name, handle_level_irq, clr,
IRQ_LEVEL, flags);
if (ret) { if (ret) {
pr_err("failed to allocate generic irq chip\n"); pr_err("failed to allocate generic irq chip\n");
goto out_free_domain; goto out_free_domain;
......
...@@ -161,6 +161,7 @@ static int __init brcmstb_l2_intc_of_init(struct device_node *np, ...@@ -161,6 +161,7 @@ static int __init brcmstb_l2_intc_of_init(struct device_node *np,
*init_params) *init_params)
{ {
unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
unsigned int set = 0;
struct brcmstb_l2_intc_data *data; struct brcmstb_l2_intc_data *data;
struct irq_chip_type *ct; struct irq_chip_type *ct;
int ret; int ret;
...@@ -208,9 +209,12 @@ static int __init brcmstb_l2_intc_of_init(struct device_node *np, ...@@ -208,9 +209,12 @@ static int __init brcmstb_l2_intc_of_init(struct device_node *np,
if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
flags |= IRQ_GC_BE_IO; flags |= IRQ_GC_BE_IO;
if (init_params->handler == handle_level_irq)
set |= IRQ_LEVEL;
/* Allocate a single Generic IRQ chip for this node */ /* Allocate a single Generic IRQ chip for this node */
ret = irq_alloc_domain_generic_chips(data->domain, 32, 1, ret = irq_alloc_domain_generic_chips(data->domain, 32, 1,
np->full_name, init_params->handler, clr, 0, flags); np->full_name, init_params->handler, clr, set, flags);
if (ret) { if (ret) {
pr_err("failed to allocate generic irq chip\n"); pr_err("failed to allocate generic irq chip\n");
goto out_free_domain; goto out_free_domain;
......
...@@ -287,7 +287,7 @@ static __init int gicv2m_allocate_domains(struct irq_domain *parent) ...@@ -287,7 +287,7 @@ static __init int gicv2m_allocate_domains(struct irq_domain *parent)
if (!v2m) if (!v2m)
return 0; return 0;
inner_domain = irq_domain_create_tree(v2m->fwnode, inner_domain = irq_domain_create_hierarchy(parent, 0, 0, v2m->fwnode,
&gicv2m_domain_ops, v2m); &gicv2m_domain_ops, v2m);
if (!inner_domain) { if (!inner_domain) {
pr_err("Failed to create GICv2m domain\n"); pr_err("Failed to create GICv2m domain\n");
...@@ -295,7 +295,6 @@ static __init int gicv2m_allocate_domains(struct irq_domain *parent) ...@@ -295,7 +295,6 @@ static __init int gicv2m_allocate_domains(struct irq_domain *parent)
} }
irq_domain_update_bus_token(inner_domain, DOMAIN_BUS_NEXUS); irq_domain_update_bus_token(inner_domain, DOMAIN_BUS_NEXUS);
inner_domain->parent = parent;
pci_domain = pci_msi_create_irq_domain(v2m->fwnode, pci_domain = pci_msi_create_irq_domain(v2m->fwnode,
&gicv2m_msi_domain_info, &gicv2m_msi_domain_info,
inner_domain); inner_domain);
......
...@@ -4909,18 +4909,19 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) ...@@ -4909,18 +4909,19 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its)
if (!info) if (!info)
return -ENOMEM; return -ENOMEM;
inner_domain = irq_domain_create_tree(handle, &its_domain_ops, its); info->ops = &its_msi_domain_ops;
info->data = its;
inner_domain = irq_domain_create_hierarchy(its_parent,
its->msi_domain_flags, 0,
handle, &its_domain_ops,
info);
if (!inner_domain) { if (!inner_domain) {
kfree(info); kfree(info);
return -ENOMEM; return -ENOMEM;
} }
inner_domain->parent = its_parent;
irq_domain_update_bus_token(inner_domain, DOMAIN_BUS_NEXUS); irq_domain_update_bus_token(inner_domain, DOMAIN_BUS_NEXUS);
inner_domain->flags |= its->msi_domain_flags;
info->ops = &its_msi_domain_ops;
info->data = its;
inner_domain->host_data = info;
return 0; return 0;
} }
......
...@@ -233,13 +233,12 @@ static int mbi_allocate_domains(struct irq_domain *parent) ...@@ -233,13 +233,12 @@ static int mbi_allocate_domains(struct irq_domain *parent)
struct irq_domain *nexus_domain, *pci_domain, *plat_domain; struct irq_domain *nexus_domain, *pci_domain, *plat_domain;
int err; int err;
nexus_domain = irq_domain_create_tree(parent->fwnode, nexus_domain = irq_domain_create_hierarchy(parent, 0, 0, parent->fwnode,
&mbi_domain_ops, NULL); &mbi_domain_ops, NULL);
if (!nexus_domain) if (!nexus_domain)
return -ENOMEM; return -ENOMEM;
irq_domain_update_bus_token(nexus_domain, DOMAIN_BUS_NEXUS); irq_domain_update_bus_token(nexus_domain, DOMAIN_BUS_NEXUS);
nexus_domain->parent = parent;
err = mbi_allocate_pci_domain(nexus_domain, &pci_domain); err = mbi_allocate_pci_domain(nexus_domain, &pci_domain);
......
...@@ -1310,9 +1310,7 @@ static void __init gic_smp_init(void) ...@@ -1310,9 +1310,7 @@ static void __init gic_smp_init(void)
gic_starting_cpu, NULL); gic_starting_cpu, NULL);
/* Register all 8 non-secure SGIs */ /* Register all 8 non-secure SGIs */
base_sgi = __irq_domain_alloc_irqs(gic_data.domain, -1, 8, base_sgi = irq_domain_alloc_irqs(gic_data.domain, 8, NUMA_NO_NODE, &sgi_fwspec);
NUMA_NO_NODE, &sgi_fwspec,
false, NULL);
if (WARN_ON(base_sgi <= 0)) if (WARN_ON(base_sgi <= 0))
return; return;
......
...@@ -139,9 +139,7 @@ static int its_alloc_vcpu_sgis(struct its_vpe *vpe, int idx) ...@@ -139,9 +139,7 @@ static int its_alloc_vcpu_sgis(struct its_vpe *vpe, int idx)
if (!vpe->sgi_domain) if (!vpe->sgi_domain)
goto err; goto err;
sgi_base = __irq_domain_alloc_irqs(vpe->sgi_domain, -1, 16, sgi_base = irq_domain_alloc_irqs(vpe->sgi_domain, 16, NUMA_NO_NODE, vpe);
NUMA_NO_NODE, vpe,
false, NULL);
if (sgi_base <= 0) if (sgi_base <= 0)
goto err; goto err;
...@@ -176,9 +174,8 @@ int its_alloc_vcpu_irqs(struct its_vm *vm) ...@@ -176,9 +174,8 @@ int its_alloc_vcpu_irqs(struct its_vm *vm)
vm->vpes[i]->idai = true; vm->vpes[i]->idai = true;
} }
vpe_base_irq = __irq_domain_alloc_irqs(vm->domain, -1, vm->nr_vpes, vpe_base_irq = irq_domain_alloc_irqs(vm->domain, vm->nr_vpes,
NUMA_NO_NODE, vm, NUMA_NO_NODE, vm);
false, NULL);
if (vpe_base_irq <= 0) if (vpe_base_irq <= 0)
goto err; goto err;
......
...@@ -868,9 +868,7 @@ static __init void gic_smp_init(void) ...@@ -868,9 +868,7 @@ static __init void gic_smp_init(void)
"irqchip/arm/gic:starting", "irqchip/arm/gic:starting",
gic_starting_cpu, NULL); gic_starting_cpu, NULL);
base_sgi = __irq_domain_alloc_irqs(gic_data[0].domain, -1, 8, base_sgi = irq_domain_alloc_irqs(gic_data[0].domain, 8, NUMA_NO_NODE, &sgi_fwspec);
NUMA_NO_NODE, &sgi_fwspec,
false, NULL);
if (WARN_ON(base_sgi <= 0)) if (WARN_ON(base_sgi <= 0))
return; return;
......
...@@ -55,6 +55,8 @@ struct liointc_priv { ...@@ -55,6 +55,8 @@ struct liointc_priv {
struct liointc_handler_data handler[LIOINTC_NUM_PARENT]; struct liointc_handler_data handler[LIOINTC_NUM_PARENT];
void __iomem *core_isr[LIOINTC_NUM_CORES]; void __iomem *core_isr[LIOINTC_NUM_CORES];
u8 map_cache[LIOINTC_CHIP_IRQ]; u8 map_cache[LIOINTC_CHIP_IRQ];
u32 int_pol;
u32 int_edge;
bool has_lpc_irq_errata; bool has_lpc_irq_errata;
}; };
...@@ -138,6 +140,14 @@ static int liointc_set_type(struct irq_data *data, unsigned int type) ...@@ -138,6 +140,14 @@ static int liointc_set_type(struct irq_data *data, unsigned int type)
return 0; return 0;
} }
static void liointc_suspend(struct irq_chip_generic *gc)
{
struct liointc_priv *priv = gc->private;
priv->int_pol = readl(gc->reg_base + LIOINTC_REG_INTC_POL);
priv->int_edge = readl(gc->reg_base + LIOINTC_REG_INTC_EDGE);
}
static void liointc_resume(struct irq_chip_generic *gc) static void liointc_resume(struct irq_chip_generic *gc)
{ {
struct liointc_priv *priv = gc->private; struct liointc_priv *priv = gc->private;
...@@ -150,6 +160,8 @@ static void liointc_resume(struct irq_chip_generic *gc) ...@@ -150,6 +160,8 @@ static void liointc_resume(struct irq_chip_generic *gc)
/* Restore map cache */ /* Restore map cache */
for (i = 0; i < LIOINTC_CHIP_IRQ; i++) for (i = 0; i < LIOINTC_CHIP_IRQ; i++)
writeb(priv->map_cache[i], gc->reg_base + i); writeb(priv->map_cache[i], gc->reg_base + i);
writel(priv->int_pol, gc->reg_base + LIOINTC_REG_INTC_POL);
writel(priv->int_edge, gc->reg_base + LIOINTC_REG_INTC_EDGE);
/* Restore mask cache */ /* Restore mask cache */
writel(gc->mask_cache, gc->reg_base + LIOINTC_REG_INTC_ENABLE); writel(gc->mask_cache, gc->reg_base + LIOINTC_REG_INTC_ENABLE);
irq_gc_unlock_irqrestore(gc, flags); irq_gc_unlock_irqrestore(gc, flags);
...@@ -269,6 +281,7 @@ static int liointc_init(phys_addr_t addr, unsigned long size, int revision, ...@@ -269,6 +281,7 @@ static int liointc_init(phys_addr_t addr, unsigned long size, int revision,
gc->private = priv; gc->private = priv;
gc->reg_base = base; gc->reg_base = base;
gc->domain = domain; gc->domain = domain;
gc->suspend = liointc_suspend;
gc->resume = liointc_resume; gc->resume = liointc_resume;
ct = gc->chip_types; ct = gc->chip_types;
......
...@@ -163,8 +163,8 @@ static int pch_msi_init_domains(struct pch_msi_data *priv, ...@@ -163,8 +163,8 @@ static int pch_msi_init_domains(struct pch_msi_data *priv,
{ {
struct irq_domain *middle_domain, *msi_domain; struct irq_domain *middle_domain, *msi_domain;
middle_domain = irq_domain_create_linear(domain_handle, middle_domain = irq_domain_create_hierarchy(parent, 0, priv->num_irqs,
priv->num_irqs, domain_handle,
&pch_msi_middle_domain_ops, &pch_msi_middle_domain_ops,
priv); priv);
if (!middle_domain) { if (!middle_domain) {
...@@ -172,7 +172,6 @@ static int pch_msi_init_domains(struct pch_msi_data *priv, ...@@ -172,7 +172,6 @@ static int pch_msi_init_domains(struct pch_msi_data *priv,
return -ENOMEM; return -ENOMEM;
} }
middle_domain->parent = parent;
irq_domain_update_bus_token(middle_domain, DOMAIN_BUS_NEXUS); irq_domain_update_bus_token(middle_domain, DOMAIN_BUS_NEXUS);
msi_domain = pci_msi_create_irq_domain(domain_handle, msi_domain = pci_msi_create_irq_domain(domain_handle,
......
...@@ -221,6 +221,7 @@ static int mvebu_gicp_probe(struct platform_device *pdev) ...@@ -221,6 +221,7 @@ static int mvebu_gicp_probe(struct platform_device *pdev)
} }
parent_domain = irq_find_host(irq_parent_dn); parent_domain = irq_find_host(irq_parent_dn);
of_node_put(irq_parent_dn);
if (!parent_domain) { if (!parent_domain) {
dev_err(&pdev->dev, "failed to find parent IRQ domain\n"); dev_err(&pdev->dev, "failed to find parent IRQ domain\n");
return -ENODEV; return -ENODEV;
......
...@@ -161,7 +161,7 @@ static struct msi_domain_info odmi_msi_domain_info = { ...@@ -161,7 +161,7 @@ static struct msi_domain_info odmi_msi_domain_info = {
static int __init mvebu_odmi_init(struct device_node *node, static int __init mvebu_odmi_init(struct device_node *node,
struct device_node *parent) struct device_node *parent)
{ {
struct irq_domain *inner_domain, *plat_domain; struct irq_domain *parent_domain, *inner_domain, *plat_domain;
int ret, i; int ret, i;
if (of_property_read_u32(node, "marvell,odmi-frames", &odmis_count)) if (of_property_read_u32(node, "marvell,odmi-frames", &odmis_count))
...@@ -197,16 +197,17 @@ static int __init mvebu_odmi_init(struct device_node *node, ...@@ -197,16 +197,17 @@ static int __init mvebu_odmi_init(struct device_node *node,
} }
} }
inner_domain = irq_domain_create_linear(of_node_to_fwnode(node), parent_domain = irq_find_host(parent);
inner_domain = irq_domain_create_hierarchy(parent_domain, 0,
odmis_count * NODMIS_PER_FRAME, odmis_count * NODMIS_PER_FRAME,
of_node_to_fwnode(node),
&odmi_domain_ops, NULL); &odmi_domain_ops, NULL);
if (!inner_domain) { if (!inner_domain) {
ret = -ENOMEM; ret = -ENOMEM;
goto err_unmap; goto err_unmap;
} }
inner_domain->parent = irq_find_host(parent);
plat_domain = platform_msi_create_irq_domain(of_node_to_fwnode(node), plat_domain = platform_msi_create_irq_domain(of_node_to_fwnode(node),
&odmi_msi_domain_info, &odmi_msi_domain_info,
inner_domain); inner_domain);
......
...@@ -236,6 +236,7 @@ static int ti_sci_intr_irq_domain_probe(struct platform_device *pdev) ...@@ -236,6 +236,7 @@ static int ti_sci_intr_irq_domain_probe(struct platform_device *pdev)
} }
parent_domain = irq_find_host(parent_node); parent_domain = irq_find_host(parent_node);
of_node_put(parent_node);
if (!parent_domain) { if (!parent_domain) {
dev_err(dev, "Failed to find IRQ parent domain\n"); dev_err(dev, "Failed to find IRQ parent domain\n");
return -ENODEV; return -ENODEV;
......
...@@ -38,8 +38,10 @@ int platform_irqchip_probe(struct platform_device *pdev) ...@@ -38,8 +38,10 @@ int platform_irqchip_probe(struct platform_device *pdev)
struct device_node *par_np = of_irq_find_parent(np); struct device_node *par_np = of_irq_find_parent(np);
of_irq_init_cb_t irq_init_cb = of_device_get_match_data(&pdev->dev); of_irq_init_cb_t irq_init_cb = of_device_get_match_data(&pdev->dev);
if (!irq_init_cb) if (!irq_init_cb) {
of_node_put(par_np);
return -EINVAL; return -EINVAL;
}
if (par_np == np) if (par_np == np)
par_np = NULL; par_np = NULL;
...@@ -52,8 +54,10 @@ int platform_irqchip_probe(struct platform_device *pdev) ...@@ -52,8 +54,10 @@ int platform_irqchip_probe(struct platform_device *pdev)
* interrupt controller. The actual initialization callback of this * interrupt controller. The actual initialization callback of this
* interrupt controller can check for specific domains as necessary. * interrupt controller can check for specific domains as necessary.
*/ */
if (par_np && !irq_find_matching_host(par_np, DOMAIN_BUS_ANY)) if (par_np && !irq_find_matching_host(par_np, DOMAIN_BUS_ANY)) {
of_node_put(par_np);
return -EPROBE_DEFER; return -EPROBE_DEFER;
}
return irq_init_cb(np, par_np); return irq_init_cb(np, par_np);
} }
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2016 Thomas Gleixner.
* Copyright (C) 2016-2017 Christoph Hellwig.
*/
#ifndef __LINUX_GROUP_CPUS_H
#define __LINUX_GROUP_CPUS_H
#include <linux/kernel.h>
#include <linux/cpu.h>
struct cpumask *group_cpus_evenly(unsigned int numgrps);
#endif
...@@ -1266,6 +1266,9 @@ int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest); ...@@ -1266,6 +1266,9 @@ int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest);
int ipi_send_single(unsigned int virq, unsigned int cpu); int ipi_send_single(unsigned int virq, unsigned int cpu);
int ipi_send_mask(unsigned int virq, const struct cpumask *dest); int ipi_send_mask(unsigned int virq, const struct cpumask *dest);
void ipi_mux_process(void);
int ipi_mux_create(unsigned int nr_ipi, void (*mux_send)(unsigned int cpu));
#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER #ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
/* /*
* Registers a generic IRQ handling function as the top-level IRQ handler in * Registers a generic IRQ handling function as the top-level IRQ handler in
......
...@@ -125,6 +125,8 @@ struct irq_domain_chip_generic; ...@@ -125,6 +125,8 @@ struct irq_domain_chip_generic;
* core code. * core code.
* @flags: Per irq_domain flags * @flags: Per irq_domain flags
* @mapcount: The number of mapped interrupts * @mapcount: The number of mapped interrupts
* @mutex: Domain lock, hierarchical domains use root domain's lock
* @root: Pointer to root domain, or containing structure if non-hierarchical
* *
* Optional elements: * Optional elements:
* @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy * @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy
...@@ -143,7 +145,6 @@ struct irq_domain_chip_generic; ...@@ -143,7 +145,6 @@ struct irq_domain_chip_generic;
* Revmap data, used internally by the irq domain code: * Revmap data, used internally by the irq domain code:
* @revmap_size: Size of the linear map table @revmap[] * @revmap_size: Size of the linear map table @revmap[]
* @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
* @revmap_mutex: Lock for the revmap
* @revmap: Linear table of irq_data pointers * @revmap: Linear table of irq_data pointers
*/ */
struct irq_domain { struct irq_domain {
...@@ -153,6 +154,8 @@ struct irq_domain { ...@@ -153,6 +154,8 @@ struct irq_domain {
void *host_data; void *host_data;
unsigned int flags; unsigned int flags;
unsigned int mapcount; unsigned int mapcount;
struct mutex mutex;
struct irq_domain *root;
/* Optional data */ /* Optional data */
struct fwnode_handle *fwnode; struct fwnode_handle *fwnode;
...@@ -171,7 +174,6 @@ struct irq_domain { ...@@ -171,7 +174,6 @@ struct irq_domain {
irq_hw_number_t hwirq_max; irq_hw_number_t hwirq_max;
unsigned int revmap_size; unsigned int revmap_size;
struct radix_tree_root revmap_tree; struct radix_tree_root revmap_tree;
struct mutex revmap_mutex;
struct irq_data __rcu *revmap[]; struct irq_data __rcu *revmap[];
}; };
......
...@@ -86,6 +86,11 @@ config GENERIC_IRQ_IPI ...@@ -86,6 +86,11 @@ config GENERIC_IRQ_IPI
depends on SMP depends on SMP
select IRQ_DOMAIN_HIERARCHY select IRQ_DOMAIN_HIERARCHY
# Generic IRQ IPI Mux support
config GENERIC_IRQ_IPI_MUX
bool
depends on SMP
# Generic MSI hierarchical interrupt domain support # Generic MSI hierarchical interrupt domain support
config GENERIC_MSI_IRQ config GENERIC_MSI_IRQ
bool bool
......
...@@ -15,6 +15,7 @@ obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o ...@@ -15,6 +15,7 @@ obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o
obj-$(CONFIG_PM_SLEEP) += pm.o obj-$(CONFIG_PM_SLEEP) += pm.o
obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o
obj-$(CONFIG_GENERIC_IRQ_IPI_MUX) += ipi-mux.o
obj-$(CONFIG_SMP) += affinity.o obj-$(CONFIG_SMP) += affinity.o
obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o
obj-$(CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR) += matrix.o obj-$(CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR) += matrix.o
...@@ -7,398 +7,7 @@ ...@@ -7,398 +7,7 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/sort.h> #include <linux/group_cpus.h>
static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
unsigned int cpus_per_vec)
{
const struct cpumask *siblmsk;
int cpu, sibl;
for ( ; cpus_per_vec > 0; ) {
cpu = cpumask_first(nmsk);
/* Should not happen, but I'm too lazy to think about it */
if (cpu >= nr_cpu_ids)
return;
cpumask_clear_cpu(cpu, nmsk);
cpumask_set_cpu(cpu, irqmsk);
cpus_per_vec--;
/* If the cpu has siblings, use them first */
siblmsk = topology_sibling_cpumask(cpu);
for (sibl = -1; cpus_per_vec > 0; ) {
sibl = cpumask_next(sibl, siblmsk);
if (sibl >= nr_cpu_ids)
break;
if (!cpumask_test_and_clear_cpu(sibl, nmsk))
continue;
cpumask_set_cpu(sibl, irqmsk);
cpus_per_vec--;
}
}
}
static cpumask_var_t *alloc_node_to_cpumask(void)
{
cpumask_var_t *masks;
int node;
masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
if (!masks)
return NULL;
for (node = 0; node < nr_node_ids; node++) {
if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
goto out_unwind;
}
return masks;
out_unwind:
while (--node >= 0)
free_cpumask_var(masks[node]);
kfree(masks);
return NULL;
}
static void free_node_to_cpumask(cpumask_var_t *masks)
{
int node;
for (node = 0; node < nr_node_ids; node++)
free_cpumask_var(masks[node]);
kfree(masks);
}
static void build_node_to_cpumask(cpumask_var_t *masks)
{
int cpu;
for_each_possible_cpu(cpu)
cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
}
static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
const struct cpumask *mask, nodemask_t *nodemsk)
{
int n, nodes = 0;
/* Calculate the number of nodes in the supplied affinity mask */
for_each_node(n) {
if (cpumask_intersects(mask, node_to_cpumask[n])) {
node_set(n, *nodemsk);
nodes++;
}
}
return nodes;
}
struct node_vectors {
unsigned id;
union {
unsigned nvectors;
unsigned ncpus;
};
};
static int ncpus_cmp_func(const void *l, const void *r)
{
const struct node_vectors *ln = l;
const struct node_vectors *rn = r;
return ln->ncpus - rn->ncpus;
}
/*
* Allocate vector number for each node, so that for each node:
*
* 1) the allocated number is >= 1
*
* 2) the allocated numbver is <= active CPU number of this node
*
* The actual allocated total vectors may be less than @numvecs when
* active total CPU number is less than @numvecs.
*
* Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
* for each node.
*/
static void alloc_nodes_vectors(unsigned int numvecs,
cpumask_var_t *node_to_cpumask,
const struct cpumask *cpu_mask,
const nodemask_t nodemsk,
struct cpumask *nmsk,
struct node_vectors *node_vectors)
{
unsigned n, remaining_ncpus = 0;
for (n = 0; n < nr_node_ids; n++) {
node_vectors[n].id = n;
node_vectors[n].ncpus = UINT_MAX;
}
for_each_node_mask(n, nodemsk) {
unsigned ncpus;
cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
ncpus = cpumask_weight(nmsk);
if (!ncpus)
continue;
remaining_ncpus += ncpus;
node_vectors[n].ncpus = ncpus;
}
numvecs = min_t(unsigned, remaining_ncpus, numvecs);
sort(node_vectors, nr_node_ids, sizeof(node_vectors[0]),
ncpus_cmp_func, NULL);
/*
* Allocate vectors for each node according to the ratio of this
* node's nr_cpus to remaining un-assigned ncpus. 'numvecs' is
* bigger than number of active numa nodes. Always start the
* allocation from the node with minimized nr_cpus.
*
* This way guarantees that each active node gets allocated at
* least one vector, and the theory is simple: over-allocation
* is only done when this node is assigned by one vector, so
* other nodes will be allocated >= 1 vector, since 'numvecs' is
* bigger than number of numa nodes.
*
* One perfect invariant is that number of allocated vectors for
* each node is <= CPU count of this node:
*
* 1) suppose there are two nodes: A and B
* ncpu(X) is CPU count of node X
* vecs(X) is the vector count allocated to node X via this
* algorithm
*
* ncpu(A) <= ncpu(B)
* ncpu(A) + ncpu(B) = N
* vecs(A) + vecs(B) = V
*
* vecs(A) = max(1, round_down(V * ncpu(A) / N))
* vecs(B) = V - vecs(A)
*
* both N and V are integer, and 2 <= V <= N, suppose
* V = N - delta, and 0 <= delta <= N - 2
*
* 2) obviously vecs(A) <= ncpu(A) because:
*
* if vecs(A) is 1, then vecs(A) <= ncpu(A) given
* ncpu(A) >= 1
*
* otherwise,
* vecs(A) <= V * ncpu(A) / N <= ncpu(A), given V <= N
*
* 3) prove how vecs(B) <= ncpu(B):
*
* if round_down(V * ncpu(A) / N) == 0, vecs(B) won't be
* over-allocated, so vecs(B) <= ncpu(B),
*
* otherwise:
*
* vecs(A) =
* round_down(V * ncpu(A) / N) =
* round_down((N - delta) * ncpu(A) / N) =
* round_down((N * ncpu(A) - delta * ncpu(A)) / N) >=
* round_down((N * ncpu(A) - delta * N) / N) =
* cpu(A) - delta
*
* then:
*
* vecs(A) - V >= ncpu(A) - delta - V
* =>
* V - vecs(A) <= V + delta - ncpu(A)
* =>
* vecs(B) <= N - ncpu(A)
* =>
* vecs(B) <= cpu(B)
*
* For nodes >= 3, it can be thought as one node and another big
* node given that is exactly what this algorithm is implemented,
* and we always re-calculate 'remaining_ncpus' & 'numvecs', and
* finally for each node X: vecs(X) <= ncpu(X).
*
*/
for (n = 0; n < nr_node_ids; n++) {
unsigned nvectors, ncpus;
if (node_vectors[n].ncpus == UINT_MAX)
continue;
WARN_ON_ONCE(numvecs == 0);
ncpus = node_vectors[n].ncpus;
nvectors = max_t(unsigned, 1,
numvecs * ncpus / remaining_ncpus);
WARN_ON_ONCE(nvectors > ncpus);
node_vectors[n].nvectors = nvectors;
remaining_ncpus -= ncpus;
numvecs -= nvectors;
}
}
static int __irq_build_affinity_masks(unsigned int startvec,
unsigned int numvecs,
unsigned int firstvec,
cpumask_var_t *node_to_cpumask,
const struct cpumask *cpu_mask,
struct cpumask *nmsk,
struct irq_affinity_desc *masks)
{
unsigned int i, n, nodes, cpus_per_vec, extra_vecs, done = 0;
unsigned int last_affv = firstvec + numvecs;
unsigned int curvec = startvec;
nodemask_t nodemsk = NODE_MASK_NONE;
struct node_vectors *node_vectors;
if (cpumask_empty(cpu_mask))
return 0;
nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
/*
* If the number of nodes in the mask is greater than or equal the
* number of vectors we just spread the vectors across the nodes.
*/
if (numvecs <= nodes) {
for_each_node_mask(n, nodemsk) {
/* Ensure that only CPUs which are in both masks are set */
cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
cpumask_or(&masks[curvec].mask, &masks[curvec].mask, nmsk);
if (++curvec == last_affv)
curvec = firstvec;
}
return numvecs;
}
node_vectors = kcalloc(nr_node_ids,
sizeof(struct node_vectors),
GFP_KERNEL);
if (!node_vectors)
return -ENOMEM;
/* allocate vector number for each node */
alloc_nodes_vectors(numvecs, node_to_cpumask, cpu_mask,
nodemsk, nmsk, node_vectors);
for (i = 0; i < nr_node_ids; i++) {
unsigned int ncpus, v;
struct node_vectors *nv = &node_vectors[i];
if (nv->nvectors == UINT_MAX)
continue;
/* Get the cpus on this node which are in the mask */
cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]);
ncpus = cpumask_weight(nmsk);
if (!ncpus)
continue;
WARN_ON_ONCE(nv->nvectors > ncpus);
/* Account for rounding errors */
extra_vecs = ncpus - nv->nvectors * (ncpus / nv->nvectors);
/* Spread allocated vectors on CPUs of the current node */
for (v = 0; v < nv->nvectors; v++, curvec++) {
cpus_per_vec = ncpus / nv->nvectors;
/* Account for extra vectors to compensate rounding errors */
if (extra_vecs) {
cpus_per_vec++;
--extra_vecs;
}
/*
* wrapping has to be considered given 'startvec'
* may start anywhere
*/
if (curvec >= last_affv)
curvec = firstvec;
irq_spread_init_one(&masks[curvec].mask, nmsk,
cpus_per_vec);
}
done += nv->nvectors;
}
kfree(node_vectors);
return done;
}
/*
* build affinity in two stages:
* 1) spread present CPU on these vectors
* 2) spread other possible CPUs on these vectors
*/
static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
unsigned int firstvec,
struct irq_affinity_desc *masks)
{
unsigned int curvec = startvec, nr_present = 0, nr_others = 0;
cpumask_var_t *node_to_cpumask;
cpumask_var_t nmsk, npresmsk;
int ret = -ENOMEM;
if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
return ret;
if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
goto fail_nmsk;
node_to_cpumask = alloc_node_to_cpumask();
if (!node_to_cpumask)
goto fail_npresmsk;
/* Stabilize the cpumasks */
cpus_read_lock();
build_node_to_cpumask(node_to_cpumask);
/* Spread on present CPUs starting from affd->pre_vectors */
ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
node_to_cpumask, cpu_present_mask,
nmsk, masks);
if (ret < 0)
goto fail_build_affinity;
nr_present = ret;
/*
* Spread on non present CPUs starting from the next vector to be
* handled. If the spreading of present CPUs already exhausted the
* vector space, assign the non present CPUs to the already spread
* out vectors.
*/
if (nr_present >= numvecs)
curvec = firstvec;
else
curvec = firstvec + nr_present;
cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
node_to_cpumask, npresmsk, nmsk,
masks);
if (ret >= 0)
nr_others = ret;
fail_build_affinity:
cpus_read_unlock();
if (ret >= 0)
WARN_ON(nr_present + nr_others < numvecs);
free_node_to_cpumask(node_to_cpumask);
fail_npresmsk:
free_cpumask_var(npresmsk);
fail_nmsk:
free_cpumask_var(nmsk);
return ret < 0 ? ret : 0;
}
static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs) static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
{ {
...@@ -461,14 +70,18 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) ...@@ -461,14 +70,18 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
*/ */
for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) { for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
unsigned int this_vecs = affd->set_size[i]; unsigned int this_vecs = affd->set_size[i];
int ret; int j;
struct cpumask *result = group_cpus_evenly(this_vecs);
ret = irq_build_affinity_masks(curvec, this_vecs, if (!result) {
curvec, masks);
if (ret) {
kfree(masks); kfree(masks);
return NULL; return NULL;
} }
for (j = 0; j < this_vecs; j++)
cpumask_copy(&masks[curvec + j].mask, &result[j]);
kfree(result);
curvec += this_vecs; curvec += this_vecs;
usedvecs += this_vecs; usedvecs += this_vecs;
} }
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Multiplex several virtual IPIs over a single HW IPI.
*
* Copyright The Asahi Linux Contributors
* Copyright (c) 2022 Ventana Micro Systems Inc.
*/
#define pr_fmt(fmt) "ipi-mux: " fmt
#include <linux/cpu.h>
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/irqchip.h>
#include <linux/irqchip/chained_irq.h>
#include <linux/irqdomain.h>
#include <linux/jump_label.h>
#include <linux/percpu.h>
#include <linux/smp.h>
struct ipi_mux_cpu {
atomic_t enable;
atomic_t bits;
};
static struct ipi_mux_cpu __percpu *ipi_mux_pcpu;
static struct irq_domain *ipi_mux_domain;
static void (*ipi_mux_send)(unsigned int cpu);
static void ipi_mux_mask(struct irq_data *d)
{
struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu);
atomic_andnot(BIT(irqd_to_hwirq(d)), &icpu->enable);
}
static void ipi_mux_unmask(struct irq_data *d)
{
struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu);
u32 ibit = BIT(irqd_to_hwirq(d));
atomic_or(ibit, &icpu->enable);
/*
* The atomic_or() above must complete before the atomic_read()
* below to avoid racing ipi_mux_send_mask().
*/
smp_mb__after_atomic();
/* If a pending IPI was unmasked, raise a parent IPI immediately. */
if (atomic_read(&icpu->bits) & ibit)
ipi_mux_send(smp_processor_id());
}
static void ipi_mux_send_mask(struct irq_data *d, const struct cpumask *mask)
{
struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu);
u32 ibit = BIT(irqd_to_hwirq(d));
unsigned long pending;
int cpu;
for_each_cpu(cpu, mask) {
icpu = per_cpu_ptr(ipi_mux_pcpu, cpu);
/*
* This sequence is the mirror of the one in ipi_mux_unmask();
* see the comment there. Additionally, release semantics
* ensure that the vIPI flag set is ordered after any shared
* memory accesses that precede it. This therefore also pairs
* with the atomic_fetch_andnot in ipi_mux_process().
*/
pending = atomic_fetch_or_release(ibit, &icpu->bits);
/*
* The atomic_fetch_or_release() above must complete
* before the atomic_read() below to avoid racing with
* ipi_mux_unmask().
*/
smp_mb__after_atomic();
/*
* The flag writes must complete before the physical IPI is
* issued to another CPU. This is implied by the control
* dependency on the result of atomic_read() below, which is
* itself already ordered after the vIPI flag write.
*/
if (!(pending & ibit) && (atomic_read(&icpu->enable) & ibit))
ipi_mux_send(cpu);
}
}
static const struct irq_chip ipi_mux_chip = {
.name = "IPI Mux",
.irq_mask = ipi_mux_mask,
.irq_unmask = ipi_mux_unmask,
.ipi_send_mask = ipi_mux_send_mask,
};
static int ipi_mux_domain_alloc(struct irq_domain *d, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
int i;
for (i = 0; i < nr_irqs; i++) {
irq_set_percpu_devid(virq + i);
irq_domain_set_info(d, virq + i, i, &ipi_mux_chip, NULL,
handle_percpu_devid_irq, NULL, NULL);
}
return 0;
}
static const struct irq_domain_ops ipi_mux_domain_ops = {
.alloc = ipi_mux_domain_alloc,
.free = irq_domain_free_irqs_top,
};
/**
* ipi_mux_process - Process multiplexed virtual IPIs
*/
void ipi_mux_process(void)
{
struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu);
irq_hw_number_t hwirq;
unsigned long ipis;
unsigned int en;
/*
* Reading enable mask does not need to be ordered as long as
* this function is called from interrupt handler because only
* the CPU itself can change it's own enable mask.
*/
en = atomic_read(&icpu->enable);
/*
* Clear the IPIs we are about to handle. This pairs with the
* atomic_fetch_or_release() in ipi_mux_send_mask().
*/
ipis = atomic_fetch_andnot(en, &icpu->bits) & en;
for_each_set_bit(hwirq, &ipis, BITS_PER_TYPE(int))
generic_handle_domain_irq(ipi_mux_domain, hwirq);
}
/**
* ipi_mux_create - Create virtual IPIs multiplexed on top of a single
* parent IPI.
* @nr_ipi: number of virtual IPIs to create. This should
* be <= BITS_PER_TYPE(int)
* @mux_send: callback to trigger parent IPI for a particular CPU
*
* Returns first virq of the newly created virtual IPIs upon success
* or <=0 upon failure
*/
int ipi_mux_create(unsigned int nr_ipi, void (*mux_send)(unsigned int cpu))
{
struct fwnode_handle *fwnode;
struct irq_domain *domain;
int rc;
if (ipi_mux_domain)
return -EEXIST;
if (BITS_PER_TYPE(int) < nr_ipi || !mux_send)
return -EINVAL;
ipi_mux_pcpu = alloc_percpu(typeof(*ipi_mux_pcpu));
if (!ipi_mux_pcpu)
return -ENOMEM;
fwnode = irq_domain_alloc_named_fwnode("IPI-Mux");
if (!fwnode) {
pr_err("unable to create IPI Mux fwnode\n");
rc = -ENOMEM;
goto fail_free_cpu;
}
domain = irq_domain_create_linear(fwnode, nr_ipi,
&ipi_mux_domain_ops, NULL);
if (!domain) {
pr_err("unable to add IPI Mux domain\n");
rc = -ENOMEM;
goto fail_free_fwnode;
}
domain->flags |= IRQ_DOMAIN_FLAG_IPI_SINGLE;
irq_domain_update_bus_token(domain, DOMAIN_BUS_IPI);
rc = irq_domain_alloc_irqs(domain, nr_ipi, NUMA_NO_NODE, NULL);
if (rc <= 0) {
pr_err("unable to alloc IRQs from IPI Mux domain\n");
goto fail_free_domain;
}
ipi_mux_domain = domain;
ipi_mux_send = mux_send;
return rc;
fail_free_domain:
irq_domain_remove(domain);
fail_free_fwnode:
irq_domain_free_fwnode(fwnode);
fail_free_cpu:
free_percpu(ipi_mux_pcpu);
return rc;
}
...@@ -25,6 +25,9 @@ static DEFINE_MUTEX(irq_domain_mutex); ...@@ -25,6 +25,9 @@ static DEFINE_MUTEX(irq_domain_mutex);
static struct irq_domain *irq_default_domain; static struct irq_domain *irq_default_domain;
static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
unsigned int nr_irqs, int node, void *arg,
bool realloc, const struct irq_affinity_desc *affinity);
static void irq_domain_check_hierarchy(struct irq_domain *domain); static void irq_domain_check_hierarchy(struct irq_domain *domain);
struct irqchip_fwid { struct irqchip_fwid {
...@@ -123,21 +126,10 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode) ...@@ -123,21 +126,10 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
} }
EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
/** static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
* __irq_domain_add() - Allocate a new irq_domain data structure unsigned int size,
* @fwnode: firmware node for the interrupt controller irq_hw_number_t hwirq_max,
* @size: Size of linear map; 0 for radix mapping only int direct_max,
* @hwirq_max: Maximum number of interrupts supported by controller
* @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
* direct mapping
* @ops: domain callbacks
* @host_data: Controller private data pointer
*
* Allocates and initializes an irq_domain structure.
* Returns pointer to IRQ domain, or NULL on failure.
*/
struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
irq_hw_number_t hwirq_max, int direct_max,
const struct irq_domain_ops *ops, const struct irq_domain_ops *ops,
void *host_data) void *host_data)
{ {
...@@ -214,25 +206,66 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s ...@@ -214,25 +206,66 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
/* Fill structure */ /* Fill structure */
INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL); INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
mutex_init(&domain->revmap_mutex);
domain->ops = ops; domain->ops = ops;
domain->host_data = host_data; domain->host_data = host_data;
domain->hwirq_max = hwirq_max; domain->hwirq_max = hwirq_max;
if (direct_max) { if (direct_max)
domain->flags |= IRQ_DOMAIN_FLAG_NO_MAP; domain->flags |= IRQ_DOMAIN_FLAG_NO_MAP;
}
domain->revmap_size = size; domain->revmap_size = size;
/*
* Hierarchical domains use the domain lock of the root domain
* (innermost domain).
*
* For non-hierarchical domains (as for root domains), the root
* pointer is set to the domain itself so that &domain->root->mutex
* always points to the right lock.
*/
mutex_init(&domain->mutex);
domain->root = domain;
irq_domain_check_hierarchy(domain); irq_domain_check_hierarchy(domain);
return domain;
}
static void __irq_domain_publish(struct irq_domain *domain)
{
mutex_lock(&irq_domain_mutex); mutex_lock(&irq_domain_mutex);
debugfs_add_domain_dir(domain); debugfs_add_domain_dir(domain);
list_add(&domain->link, &irq_domain_list); list_add(&domain->link, &irq_domain_list);
mutex_unlock(&irq_domain_mutex); mutex_unlock(&irq_domain_mutex);
pr_debug("Added domain %s\n", domain->name); pr_debug("Added domain %s\n", domain->name);
}
/**
* __irq_domain_add() - Allocate a new irq_domain data structure
* @fwnode: firmware node for the interrupt controller
* @size: Size of linear map; 0 for radix mapping only
* @hwirq_max: Maximum number of interrupts supported by controller
* @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
* direct mapping
* @ops: domain callbacks
* @host_data: Controller private data pointer
*
* Allocates and initializes an irq_domain structure.
* Returns pointer to IRQ domain, or NULL on failure.
*/
struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
irq_hw_number_t hwirq_max, int direct_max,
const struct irq_domain_ops *ops,
void *host_data)
{
struct irq_domain *domain;
domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
ops, host_data);
if (domain)
__irq_domain_publish(domain);
return domain; return domain;
} }
EXPORT_SYMBOL_GPL(__irq_domain_add); EXPORT_SYMBOL_GPL(__irq_domain_add);
...@@ -502,30 +535,34 @@ static bool irq_domain_is_nomap(struct irq_domain *domain) ...@@ -502,30 +535,34 @@ static bool irq_domain_is_nomap(struct irq_domain *domain)
static void irq_domain_clear_mapping(struct irq_domain *domain, static void irq_domain_clear_mapping(struct irq_domain *domain,
irq_hw_number_t hwirq) irq_hw_number_t hwirq)
{ {
lockdep_assert_held(&domain->root->mutex);
if (irq_domain_is_nomap(domain)) if (irq_domain_is_nomap(domain))
return; return;
mutex_lock(&domain->revmap_mutex);
if (hwirq < domain->revmap_size) if (hwirq < domain->revmap_size)
rcu_assign_pointer(domain->revmap[hwirq], NULL); rcu_assign_pointer(domain->revmap[hwirq], NULL);
else else
radix_tree_delete(&domain->revmap_tree, hwirq); radix_tree_delete(&domain->revmap_tree, hwirq);
mutex_unlock(&domain->revmap_mutex);
} }
static void irq_domain_set_mapping(struct irq_domain *domain, static void irq_domain_set_mapping(struct irq_domain *domain,
irq_hw_number_t hwirq, irq_hw_number_t hwirq,
struct irq_data *irq_data) struct irq_data *irq_data)
{ {
/*
* This also makes sure that all domains point to the same root when
* called from irq_domain_insert_irq() for each domain in a hierarchy.
*/
lockdep_assert_held(&domain->root->mutex);
if (irq_domain_is_nomap(domain)) if (irq_domain_is_nomap(domain))
return; return;
mutex_lock(&domain->revmap_mutex);
if (hwirq < domain->revmap_size) if (hwirq < domain->revmap_size)
rcu_assign_pointer(domain->revmap[hwirq], irq_data); rcu_assign_pointer(domain->revmap[hwirq], irq_data);
else else
radix_tree_insert(&domain->revmap_tree, hwirq, irq_data); radix_tree_insert(&domain->revmap_tree, hwirq, irq_data);
mutex_unlock(&domain->revmap_mutex);
} }
static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
...@@ -538,6 +575,9 @@ static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) ...@@ -538,6 +575,9 @@ static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
return; return;
hwirq = irq_data->hwirq; hwirq = irq_data->hwirq;
mutex_lock(&domain->root->mutex);
irq_set_status_flags(irq, IRQ_NOREQUEST); irq_set_status_flags(irq, IRQ_NOREQUEST);
/* remove chip and handler */ /* remove chip and handler */
...@@ -557,9 +597,11 @@ static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) ...@@ -557,9 +597,11 @@ static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
/* Clear reverse map for this hwirq */ /* Clear reverse map for this hwirq */
irq_domain_clear_mapping(domain, hwirq); irq_domain_clear_mapping(domain, hwirq);
mutex_unlock(&domain->root->mutex);
} }
int irq_domain_associate(struct irq_domain *domain, unsigned int virq, static int irq_domain_associate_locked(struct irq_domain *domain, unsigned int virq,
irq_hw_number_t hwirq) irq_hw_number_t hwirq)
{ {
struct irq_data *irq_data = irq_get_irq_data(virq); struct irq_data *irq_data = irq_get_irq_data(virq);
...@@ -573,7 +615,6 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq, ...@@ -573,7 +615,6 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
if (WARN(irq_data->domain, "error: virq%i is already associated", virq)) if (WARN(irq_data->domain, "error: virq%i is already associated", virq))
return -EINVAL; return -EINVAL;
mutex_lock(&irq_domain_mutex);
irq_data->hwirq = hwirq; irq_data->hwirq = hwirq;
irq_data->domain = domain; irq_data->domain = domain;
if (domain->ops->map) { if (domain->ops->map) {
...@@ -590,23 +631,29 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq, ...@@ -590,23 +631,29 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
} }
irq_data->domain = NULL; irq_data->domain = NULL;
irq_data->hwirq = 0; irq_data->hwirq = 0;
mutex_unlock(&irq_domain_mutex);
return ret; return ret;
} }
/* If not already assigned, give the domain the chip's name */
if (!domain->name && irq_data->chip)
domain->name = irq_data->chip->name;
} }
domain->mapcount++; domain->mapcount++;
irq_domain_set_mapping(domain, hwirq, irq_data); irq_domain_set_mapping(domain, hwirq, irq_data);
mutex_unlock(&irq_domain_mutex);
irq_clear_status_flags(virq, IRQ_NOREQUEST); irq_clear_status_flags(virq, IRQ_NOREQUEST);
return 0; return 0;
} }
int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
irq_hw_number_t hwirq)
{
int ret;
mutex_lock(&domain->root->mutex);
ret = irq_domain_associate_locked(domain, virq, hwirq);
mutex_unlock(&domain->root->mutex);
return ret;
}
EXPORT_SYMBOL_GPL(irq_domain_associate); EXPORT_SYMBOL_GPL(irq_domain_associate);
void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
...@@ -619,9 +666,8 @@ void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, ...@@ -619,9 +666,8 @@ void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__, pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__,
of_node_full_name(of_node), irq_base, (int)hwirq_base, count); of_node_full_name(of_node), irq_base, (int)hwirq_base, count);
for (i = 0; i < count; i++) { for (i = 0; i < count; i++)
irq_domain_associate(domain, irq_base + i, hwirq_base + i); irq_domain_associate(domain, irq_base + i, hwirq_base + i);
}
} }
EXPORT_SYMBOL_GPL(irq_domain_associate_many); EXPORT_SYMBOL_GPL(irq_domain_associate_many);
...@@ -668,6 +714,34 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) ...@@ -668,6 +714,34 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
EXPORT_SYMBOL_GPL(irq_create_direct_mapping); EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
#endif #endif
static unsigned int irq_create_mapping_affinity_locked(struct irq_domain *domain,
irq_hw_number_t hwirq,
const struct irq_affinity_desc *affinity)
{
struct device_node *of_node = irq_domain_get_of_node(domain);
int virq;
pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
/* Allocate a virtual interrupt number */
virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node),
affinity);
if (virq <= 0) {
pr_debug("-> virq allocation failed\n");
return 0;
}
if (irq_domain_associate_locked(domain, virq, hwirq)) {
irq_free_desc(virq);
return 0;
}
pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
hwirq, of_node_full_name(of_node), virq);
return virq;
}
/** /**
* irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
* @domain: domain owning this hardware interrupt or NULL for default domain * @domain: domain owning this hardware interrupt or NULL for default domain
...@@ -683,11 +757,8 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, ...@@ -683,11 +757,8 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
irq_hw_number_t hwirq, irq_hw_number_t hwirq,
const struct irq_affinity_desc *affinity) const struct irq_affinity_desc *affinity)
{ {
struct device_node *of_node;
int virq; int virq;
pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
/* Look for default domain if necessary */ /* Look for default domain if necessary */
if (domain == NULL) if (domain == NULL)
domain = irq_default_domain; domain = irq_default_domain;
...@@ -695,32 +766,19 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, ...@@ -695,32 +766,19 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq); WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq);
return 0; return 0;
} }
pr_debug("-> using domain @%p\n", domain);
of_node = irq_domain_get_of_node(domain); mutex_lock(&domain->root->mutex);
/* Check if mapping already exists */ /* Check if mapping already exists */
virq = irq_find_mapping(domain, hwirq); virq = irq_find_mapping(domain, hwirq);
if (virq) { if (virq) {
pr_debug("-> existing mapping on virq %d\n", virq); pr_debug("existing mapping on virq %d\n", virq);
return virq; goto out;
} }
/* Allocate a virtual interrupt number */ virq = irq_create_mapping_affinity_locked(domain, hwirq, affinity);
virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), out:
affinity); mutex_unlock(&domain->root->mutex);
if (virq <= 0) {
pr_debug("-> virq allocation failed\n");
return 0;
}
if (irq_domain_associate(domain, virq, hwirq)) {
irq_free_desc(virq);
return 0;
}
pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
hwirq, of_node_full_name(of_node), virq);
return virq; return virq;
} }
...@@ -789,6 +847,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) ...@@ -789,6 +847,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK)) if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK))
type &= IRQ_TYPE_SENSE_MASK; type &= IRQ_TYPE_SENSE_MASK;
mutex_lock(&domain->root->mutex);
/* /*
* If we've already configured this interrupt, * If we've already configured this interrupt,
* don't do it again, or hell will break loose. * don't do it again, or hell will break loose.
...@@ -801,7 +861,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) ...@@ -801,7 +861,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
* interrupt number. * interrupt number.
*/ */
if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq)) if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
return virq; goto out;
/* /*
* If the trigger type has not been set yet, then set * If the trigger type has not been set yet, then set
...@@ -809,40 +869,45 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) ...@@ -809,40 +869,45 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
*/ */
if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) { if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
irq_data = irq_get_irq_data(virq); irq_data = irq_get_irq_data(virq);
if (!irq_data) if (!irq_data) {
return 0; virq = 0;
goto out;
}
irqd_set_trigger_type(irq_data, type); irqd_set_trigger_type(irq_data, type);
return virq; goto out;
} }
pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n", pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
hwirq, of_node_full_name(to_of_node(fwspec->fwnode))); hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
return 0; virq = 0;
goto out;
} }
if (irq_domain_is_hierarchy(domain)) { if (irq_domain_is_hierarchy(domain)) {
virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec); virq = irq_domain_alloc_irqs_locked(domain, -1, 1, NUMA_NO_NODE,
if (virq <= 0) fwspec, false, NULL);
return 0; if (virq <= 0) {
virq = 0;
goto out;
}
} else { } else {
/* Create mapping */ /* Create mapping */
virq = irq_create_mapping(domain, hwirq); virq = irq_create_mapping_affinity_locked(domain, hwirq, NULL);
if (!virq) if (!virq)
return virq; goto out;
} }
irq_data = irq_get_irq_data(virq); irq_data = irq_get_irq_data(virq);
if (!irq_data) { if (WARN_ON(!irq_data)) {
if (irq_domain_is_hierarchy(domain)) virq = 0;
irq_domain_free_irqs(virq, 1); goto out;
else
irq_dispose_mapping(virq);
return 0;
} }
/* Store trigger type */ /* Store trigger type */
irqd_set_trigger_type(irq_data, type); irqd_set_trigger_type(irq_data, type);
out:
mutex_unlock(&domain->root->mutex);
return virq; return virq;
} }
...@@ -1102,12 +1167,16 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, ...@@ -1102,12 +1167,16 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
struct irq_domain *domain; struct irq_domain *domain;
if (size) if (size)
domain = irq_domain_create_linear(fwnode, size, ops, host_data); domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
else else
domain = irq_domain_create_tree(fwnode, ops, host_data); domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
if (domain) { if (domain) {
domain->root = parent->root;
domain->parent = parent; domain->parent = parent;
domain->flags |= flags; domain->flags |= flags;
__irq_domain_publish(domain);
} }
return domain; return domain;
...@@ -1123,10 +1192,6 @@ static void irq_domain_insert_irq(int virq) ...@@ -1123,10 +1192,6 @@ static void irq_domain_insert_irq(int virq)
domain->mapcount++; domain->mapcount++;
irq_domain_set_mapping(domain, data->hwirq, data); irq_domain_set_mapping(domain, data->hwirq, data);
/* If not already assigned, give the domain the chip's name */
if (!domain->name && data->chip)
domain->name = data->chip->name;
} }
irq_clear_status_flags(virq, IRQ_NOREQUEST); irq_clear_status_flags(virq, IRQ_NOREQUEST);
...@@ -1426,40 +1491,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, ...@@ -1426,40 +1491,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
return domain->ops->alloc(domain, irq_base, nr_irqs, arg); return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
} }
/** static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
* __irq_domain_alloc_irqs - Allocate IRQs from domain
* @domain: domain to allocate from
* @irq_base: allocate specified IRQ number if irq_base >= 0
* @nr_irqs: number of IRQs to allocate
* @node: NUMA node id for memory allocation
* @arg: domain specific argument
* @realloc: IRQ descriptors have already been allocated if true
* @affinity: Optional irq affinity mask for multiqueue devices
*
* Allocate IRQ numbers and initialized all data structures to support
* hierarchy IRQ domains.
* Parameter @realloc is mainly to support legacy IRQs.
* Returns error code or allocated IRQ number
*
* The whole process to setup an IRQ has been split into two steps.
* The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
* descriptor and required hardware resources. The second step,
* irq_domain_activate_irq(), is to program the hardware with preallocated
* resources. In this way, it's easier to rollback when failing to
* allocate resources.
*/
int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
unsigned int nr_irqs, int node, void *arg, unsigned int nr_irqs, int node, void *arg,
bool realloc, const struct irq_affinity_desc *affinity) bool realloc, const struct irq_affinity_desc *affinity)
{ {
int i, ret, virq; int i, ret, virq;
if (domain == NULL) {
domain = irq_default_domain;
if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
return -EINVAL;
}
if (realloc && irq_base >= 0) { if (realloc && irq_base >= 0) {
virq = irq_base; virq = irq_base;
} else { } else {
...@@ -1478,24 +1515,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, ...@@ -1478,24 +1515,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
goto out_free_desc; goto out_free_desc;
} }
mutex_lock(&irq_domain_mutex);
ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg); ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
if (ret < 0) { if (ret < 0)
mutex_unlock(&irq_domain_mutex);
goto out_free_irq_data; goto out_free_irq_data;
}
for (i = 0; i < nr_irqs; i++) { for (i = 0; i < nr_irqs; i++) {
ret = irq_domain_trim_hierarchy(virq + i); ret = irq_domain_trim_hierarchy(virq + i);
if (ret) { if (ret)
mutex_unlock(&irq_domain_mutex);
goto out_free_irq_data; goto out_free_irq_data;
} }
}
for (i = 0; i < nr_irqs; i++) for (i = 0; i < nr_irqs; i++)
irq_domain_insert_irq(virq + i); irq_domain_insert_irq(virq + i);
mutex_unlock(&irq_domain_mutex);
return virq; return virq;
...@@ -1505,6 +1536,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, ...@@ -1505,6 +1536,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
irq_free_descs(virq, nr_irqs); irq_free_descs(virq, nr_irqs);
return ret; return ret;
} }
/**
* __irq_domain_alloc_irqs - Allocate IRQs from domain
* @domain: domain to allocate from
* @irq_base: allocate specified IRQ number if irq_base >= 0
* @nr_irqs: number of IRQs to allocate
* @node: NUMA node id for memory allocation
* @arg: domain specific argument
* @realloc: IRQ descriptors have already been allocated if true
* @affinity: Optional irq affinity mask for multiqueue devices
*
* Allocate IRQ numbers and initialized all data structures to support
* hierarchy IRQ domains.
* Parameter @realloc is mainly to support legacy IRQs.
* Returns error code or allocated IRQ number
*
* The whole process to setup an IRQ has been split into two steps.
* The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
* descriptor and required hardware resources. The second step,
* irq_domain_activate_irq(), is to program the hardware with preallocated
* resources. In this way, it's easier to rollback when failing to
* allocate resources.
*/
int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
unsigned int nr_irqs, int node, void *arg,
bool realloc, const struct irq_affinity_desc *affinity)
{
int ret;
if (domain == NULL) {
domain = irq_default_domain;
if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
return -EINVAL;
}
mutex_lock(&domain->root->mutex);
ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
realloc, affinity);
mutex_unlock(&domain->root->mutex);
return ret;
}
EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs); EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs);
/* The irq_data was moved, fix the revmap to refer to the new location */ /* The irq_data was moved, fix the revmap to refer to the new location */
...@@ -1512,11 +1585,12 @@ static void irq_domain_fix_revmap(struct irq_data *d) ...@@ -1512,11 +1585,12 @@ static void irq_domain_fix_revmap(struct irq_data *d)
{ {
void __rcu **slot; void __rcu **slot;
lockdep_assert_held(&d->domain->root->mutex);
if (irq_domain_is_nomap(d->domain)) if (irq_domain_is_nomap(d->domain))
return; return;
/* Fix up the revmap. */ /* Fix up the revmap. */
mutex_lock(&d->domain->revmap_mutex);
if (d->hwirq < d->domain->revmap_size) { if (d->hwirq < d->domain->revmap_size) {
/* Not using radix tree */ /* Not using radix tree */
rcu_assign_pointer(d->domain->revmap[d->hwirq], d); rcu_assign_pointer(d->domain->revmap[d->hwirq], d);
...@@ -1525,7 +1599,6 @@ static void irq_domain_fix_revmap(struct irq_data *d) ...@@ -1525,7 +1599,6 @@ static void irq_domain_fix_revmap(struct irq_data *d)
if (slot) if (slot)
radix_tree_replace_slot(&d->domain->revmap_tree, slot, d); radix_tree_replace_slot(&d->domain->revmap_tree, slot, d);
} }
mutex_unlock(&d->domain->revmap_mutex);
} }
/** /**
...@@ -1541,8 +1614,8 @@ static void irq_domain_fix_revmap(struct irq_data *d) ...@@ -1541,8 +1614,8 @@ static void irq_domain_fix_revmap(struct irq_data *d)
*/ */
int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg) int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg)
{ {
struct irq_data *child_irq_data; struct irq_data *irq_data = irq_get_irq_data(virq);
struct irq_data *root_irq_data = irq_get_irq_data(virq); struct irq_data *parent_irq_data;
struct irq_desc *desc; struct irq_desc *desc;
int rv = 0; int rv = 0;
...@@ -1567,47 +1640,46 @@ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg) ...@@ -1567,47 +1640,46 @@ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg)
if (WARN_ON(!irq_domain_is_hierarchy(domain))) if (WARN_ON(!irq_domain_is_hierarchy(domain)))
return -EINVAL; return -EINVAL;
if (!root_irq_data) if (!irq_data)
return -EINVAL; return -EINVAL;
if (domain->parent != root_irq_data->domain) if (domain->parent != irq_data->domain)
return -EINVAL; return -EINVAL;
child_irq_data = kzalloc_node(sizeof(*child_irq_data), GFP_KERNEL, parent_irq_data = kzalloc_node(sizeof(*parent_irq_data), GFP_KERNEL,
irq_data_get_node(root_irq_data)); irq_data_get_node(irq_data));
if (!child_irq_data) if (!parent_irq_data)
return -ENOMEM; return -ENOMEM;
mutex_lock(&irq_domain_mutex); mutex_lock(&domain->root->mutex);
/* Copy the original irq_data. */ /* Copy the original irq_data. */
*child_irq_data = *root_irq_data; *parent_irq_data = *irq_data;
/* /*
* Overwrite the root_irq_data, which is embedded in struct * Overwrite the irq_data, which is embedded in struct irq_desc, with
* irq_desc, with values for this domain. * values for this domain.
*/ */
root_irq_data->parent_data = child_irq_data; irq_data->parent_data = parent_irq_data;
root_irq_data->domain = domain; irq_data->domain = domain;
root_irq_data->mask = 0; irq_data->mask = 0;
root_irq_data->hwirq = 0; irq_data->hwirq = 0;
root_irq_data->chip = NULL; irq_data->chip = NULL;
root_irq_data->chip_data = NULL; irq_data->chip_data = NULL;
/* May (probably does) set hwirq, chip, etc. */ /* May (probably does) set hwirq, chip, etc. */
rv = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg); rv = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg);
if (rv) { if (rv) {
/* Restore the original irq_data. */ /* Restore the original irq_data. */
*root_irq_data = *child_irq_data; *irq_data = *parent_irq_data;
kfree(child_irq_data); kfree(parent_irq_data);
goto error; goto error;
} }
irq_domain_fix_revmap(child_irq_data); irq_domain_fix_revmap(parent_irq_data);
irq_domain_set_mapping(domain, root_irq_data->hwirq, root_irq_data); irq_domain_set_mapping(domain, irq_data->hwirq, irq_data);
error: error:
mutex_unlock(&irq_domain_mutex); mutex_unlock(&domain->root->mutex);
return rv; return rv;
} }
...@@ -1623,8 +1695,8 @@ EXPORT_SYMBOL_GPL(irq_domain_push_irq); ...@@ -1623,8 +1695,8 @@ EXPORT_SYMBOL_GPL(irq_domain_push_irq);
*/ */
int irq_domain_pop_irq(struct irq_domain *domain, int virq) int irq_domain_pop_irq(struct irq_domain *domain, int virq)
{ {
struct irq_data *root_irq_data = irq_get_irq_data(virq); struct irq_data *irq_data = irq_get_irq_data(virq);
struct irq_data *child_irq_data; struct irq_data *parent_irq_data;
struct irq_data *tmp_irq_data; struct irq_data *tmp_irq_data;
struct irq_desc *desc; struct irq_desc *desc;
...@@ -1646,37 +1718,37 @@ int irq_domain_pop_irq(struct irq_domain *domain, int virq) ...@@ -1646,37 +1718,37 @@ int irq_domain_pop_irq(struct irq_domain *domain, int virq)
if (domain == NULL) if (domain == NULL)
return -EINVAL; return -EINVAL;
if (!root_irq_data) if (!irq_data)
return -EINVAL; return -EINVAL;
tmp_irq_data = irq_domain_get_irq_data(domain, virq); tmp_irq_data = irq_domain_get_irq_data(domain, virq);
/* We can only "pop" if this domain is at the top of the list */ /* We can only "pop" if this domain is at the top of the list */
if (WARN_ON(root_irq_data != tmp_irq_data)) if (WARN_ON(irq_data != tmp_irq_data))
return -EINVAL; return -EINVAL;
if (WARN_ON(root_irq_data->domain != domain)) if (WARN_ON(irq_data->domain != domain))
return -EINVAL; return -EINVAL;
child_irq_data = root_irq_data->parent_data; parent_irq_data = irq_data->parent_data;
if (WARN_ON(!child_irq_data)) if (WARN_ON(!parent_irq_data))
return -EINVAL; return -EINVAL;
mutex_lock(&irq_domain_mutex); mutex_lock(&domain->root->mutex);
root_irq_data->parent_data = NULL; irq_data->parent_data = NULL;
irq_domain_clear_mapping(domain, root_irq_data->hwirq); irq_domain_clear_mapping(domain, irq_data->hwirq);
irq_domain_free_irqs_hierarchy(domain, virq, 1); irq_domain_free_irqs_hierarchy(domain, virq, 1);
/* Restore the original irq_data. */ /* Restore the original irq_data. */
*root_irq_data = *child_irq_data; *irq_data = *parent_irq_data;
irq_domain_fix_revmap(root_irq_data); irq_domain_fix_revmap(irq_data);
mutex_unlock(&irq_domain_mutex); mutex_unlock(&domain->root->mutex);
kfree(child_irq_data); kfree(parent_irq_data);
return 0; return 0;
} }
...@@ -1690,17 +1762,20 @@ EXPORT_SYMBOL_GPL(irq_domain_pop_irq); ...@@ -1690,17 +1762,20 @@ EXPORT_SYMBOL_GPL(irq_domain_pop_irq);
void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs)
{ {
struct irq_data *data = irq_get_irq_data(virq); struct irq_data *data = irq_get_irq_data(virq);
struct irq_domain *domain;
int i; int i;
if (WARN(!data || !data->domain || !data->domain->ops->free, if (WARN(!data || !data->domain || !data->domain->ops->free,
"NULL pointer, cannot free irq\n")) "NULL pointer, cannot free irq\n"))
return; return;
mutex_lock(&irq_domain_mutex); domain = data->domain;
mutex_lock(&domain->root->mutex);
for (i = 0; i < nr_irqs; i++) for (i = 0; i < nr_irqs; i++)
irq_domain_remove_irq(virq + i); irq_domain_remove_irq(virq + i);
irq_domain_free_irqs_hierarchy(data->domain, virq, nr_irqs); irq_domain_free_irqs_hierarchy(domain, virq, nr_irqs);
mutex_unlock(&irq_domain_mutex); mutex_unlock(&domain->root->mutex);
irq_domain_free_irq_data(virq, nr_irqs); irq_domain_free_irq_data(virq, nr_irqs);
irq_free_descs(virq, nr_irqs); irq_free_descs(virq, nr_irqs);
...@@ -1865,6 +1940,13 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, ...@@ -1865,6 +1940,13 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
irq_set_handler_data(virq, handler_data); irq_set_handler_data(virq, handler_data);
} }
static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
unsigned int nr_irqs, int node, void *arg,
bool realloc, const struct irq_affinity_desc *affinity)
{
return -EINVAL;
}
static void irq_domain_check_hierarchy(struct irq_domain *domain) static void irq_domain_check_hierarchy(struct irq_domain *domain)
{ {
} }
......
...@@ -723,10 +723,13 @@ EXPORT_SYMBOL(disable_irq_nosync); ...@@ -723,10 +723,13 @@ EXPORT_SYMBOL(disable_irq_nosync);
* to complete before returning. If you use this function while * to complete before returning. If you use this function while
* holding a resource the IRQ handler may need you will deadlock. * holding a resource the IRQ handler may need you will deadlock.
* *
* This function may be called - with care - from IRQ context. * Can only be called from preemptible code as it might sleep when
* an interrupt thread is associated to @irq.
*
*/ */
void disable_irq(unsigned int irq) void disable_irq(unsigned int irq)
{ {
might_sleep();
if (!__disable_irq_nosync(irq)) if (!__disable_irq_nosync(irq))
synchronize_irq(irq); synchronize_irq(irq);
} }
......
...@@ -353,6 +353,8 @@ obj-$(CONFIG_SBITMAP) += sbitmap.o ...@@ -353,6 +353,8 @@ obj-$(CONFIG_SBITMAP) += sbitmap.o
obj-$(CONFIG_PARMAN) += parman.o obj-$(CONFIG_PARMAN) += parman.o
obj-y += group_cpus.o
# GCC library routines # GCC library routines
obj-$(CONFIG_GENERIC_LIB_ASHLDI3) += ashldi3.o obj-$(CONFIG_GENERIC_LIB_ASHLDI3) += ashldi3.o
obj-$(CONFIG_GENERIC_LIB_ASHRDI3) += ashrdi3.o obj-$(CONFIG_GENERIC_LIB_ASHRDI3) += ashrdi3.o
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 Thomas Gleixner.
* Copyright (C) 2016-2017 Christoph Hellwig.
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/sort.h>
#include <linux/group_cpus.h>
#ifdef CONFIG_SMP
static void grp_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
unsigned int cpus_per_grp)
{
const struct cpumask *siblmsk;
int cpu, sibl;
for ( ; cpus_per_grp > 0; ) {
cpu = cpumask_first(nmsk);
/* Should not happen, but I'm too lazy to think about it */
if (cpu >= nr_cpu_ids)
return;
cpumask_clear_cpu(cpu, nmsk);
cpumask_set_cpu(cpu, irqmsk);
cpus_per_grp--;
/* If the cpu has siblings, use them first */
siblmsk = topology_sibling_cpumask(cpu);
for (sibl = -1; cpus_per_grp > 0; ) {
sibl = cpumask_next(sibl, siblmsk);
if (sibl >= nr_cpu_ids)
break;
if (!cpumask_test_and_clear_cpu(sibl, nmsk))
continue;
cpumask_set_cpu(sibl, irqmsk);
cpus_per_grp--;
}
}
}
static cpumask_var_t *alloc_node_to_cpumask(void)
{
cpumask_var_t *masks;
int node;
masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
if (!masks)
return NULL;
for (node = 0; node < nr_node_ids; node++) {
if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
goto out_unwind;
}
return masks;
out_unwind:
while (--node >= 0)
free_cpumask_var(masks[node]);
kfree(masks);
return NULL;
}
static void free_node_to_cpumask(cpumask_var_t *masks)
{
int node;
for (node = 0; node < nr_node_ids; node++)
free_cpumask_var(masks[node]);
kfree(masks);
}
static void build_node_to_cpumask(cpumask_var_t *masks)
{
int cpu;
for_each_possible_cpu(cpu)
cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
}
static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
const struct cpumask *mask, nodemask_t *nodemsk)
{
int n, nodes = 0;
/* Calculate the number of nodes in the supplied affinity mask */
for_each_node(n) {
if (cpumask_intersects(mask, node_to_cpumask[n])) {
node_set(n, *nodemsk);
nodes++;
}
}
return nodes;
}
struct node_groups {
unsigned id;
union {
unsigned ngroups;
unsigned ncpus;
};
};
static int ncpus_cmp_func(const void *l, const void *r)
{
const struct node_groups *ln = l;
const struct node_groups *rn = r;
return ln->ncpus - rn->ncpus;
}
/*
* Allocate group number for each node, so that for each node:
*
* 1) the allocated number is >= 1
*
* 2) the allocated number is <= active CPU number of this node
*
* The actual allocated total groups may be less than @numgrps when
* active total CPU number is less than @numgrps.
*
* Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
* for each node.
*/
static void alloc_nodes_groups(unsigned int numgrps,
cpumask_var_t *node_to_cpumask,
const struct cpumask *cpu_mask,
const nodemask_t nodemsk,
struct cpumask *nmsk,
struct node_groups *node_groups)
{
unsigned n, remaining_ncpus = 0;
for (n = 0; n < nr_node_ids; n++) {
node_groups[n].id = n;
node_groups[n].ncpus = UINT_MAX;
}
for_each_node_mask(n, nodemsk) {
unsigned ncpus;
cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
ncpus = cpumask_weight(nmsk);
if (!ncpus)
continue;
remaining_ncpus += ncpus;
node_groups[n].ncpus = ncpus;
}
numgrps = min_t(unsigned, remaining_ncpus, numgrps);
sort(node_groups, nr_node_ids, sizeof(node_groups[0]),
ncpus_cmp_func, NULL);
/*
* Allocate groups for each node according to the ratio of this
* node's nr_cpus to remaining un-assigned ncpus. 'numgrps' is
* bigger than number of active numa nodes. Always start the
* allocation from the node with minimized nr_cpus.
*
* This way guarantees that each active node gets allocated at
* least one group, and the theory is simple: over-allocation
* is only done when this node is assigned by one group, so
* other nodes will be allocated >= 1 groups, since 'numgrps' is
* bigger than number of numa nodes.
*
* One perfect invariant is that number of allocated groups for
* each node is <= CPU count of this node:
*
* 1) suppose there are two nodes: A and B
* ncpu(X) is CPU count of node X
* grps(X) is the group count allocated to node X via this
* algorithm
*
* ncpu(A) <= ncpu(B)
* ncpu(A) + ncpu(B) = N
* grps(A) + grps(B) = G
*
* grps(A) = max(1, round_down(G * ncpu(A) / N))
* grps(B) = G - grps(A)
*
* both N and G are integer, and 2 <= G <= N, suppose
* G = N - delta, and 0 <= delta <= N - 2
*
* 2) obviously grps(A) <= ncpu(A) because:
*
* if grps(A) is 1, then grps(A) <= ncpu(A) given
* ncpu(A) >= 1
*
* otherwise,
* grps(A) <= G * ncpu(A) / N <= ncpu(A), given G <= N
*
* 3) prove how grps(B) <= ncpu(B):
*
* if round_down(G * ncpu(A) / N) == 0, vecs(B) won't be
* over-allocated, so grps(B) <= ncpu(B),
*
* otherwise:
*
* grps(A) =
* round_down(G * ncpu(A) / N) =
* round_down((N - delta) * ncpu(A) / N) =
* round_down((N * ncpu(A) - delta * ncpu(A)) / N) >=
* round_down((N * ncpu(A) - delta * N) / N) =
* cpu(A) - delta
*
* then:
*
* grps(A) - G >= ncpu(A) - delta - G
* =>
* G - grps(A) <= G + delta - ncpu(A)
* =>
* grps(B) <= N - ncpu(A)
* =>
* grps(B) <= cpu(B)
*
* For nodes >= 3, it can be thought as one node and another big
* node given that is exactly what this algorithm is implemented,
* and we always re-calculate 'remaining_ncpus' & 'numgrps', and
* finally for each node X: grps(X) <= ncpu(X).
*
*/
for (n = 0; n < nr_node_ids; n++) {
unsigned ngroups, ncpus;
if (node_groups[n].ncpus == UINT_MAX)
continue;
WARN_ON_ONCE(numgrps == 0);
ncpus = node_groups[n].ncpus;
ngroups = max_t(unsigned, 1,
numgrps * ncpus / remaining_ncpus);
WARN_ON_ONCE(ngroups > ncpus);
node_groups[n].ngroups = ngroups;
remaining_ncpus -= ncpus;
numgrps -= ngroups;
}
}
static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps,
cpumask_var_t *node_to_cpumask,
const struct cpumask *cpu_mask,
struct cpumask *nmsk, struct cpumask *masks)
{
unsigned int i, n, nodes, cpus_per_grp, extra_grps, done = 0;
unsigned int last_grp = numgrps;
unsigned int curgrp = startgrp;
nodemask_t nodemsk = NODE_MASK_NONE;
struct node_groups *node_groups;
if (cpumask_empty(cpu_mask))
return 0;
nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
/*
* If the number of nodes in the mask is greater than or equal the
* number of groups we just spread the groups across the nodes.
*/
if (numgrps <= nodes) {
for_each_node_mask(n, nodemsk) {
/* Ensure that only CPUs which are in both masks are set */
cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
cpumask_or(&masks[curgrp], &masks[curgrp], nmsk);
if (++curgrp == last_grp)
curgrp = 0;
}
return numgrps;
}
node_groups = kcalloc(nr_node_ids,
sizeof(struct node_groups),
GFP_KERNEL);
if (!node_groups)
return -ENOMEM;
/* allocate group number for each node */
alloc_nodes_groups(numgrps, node_to_cpumask, cpu_mask,
nodemsk, nmsk, node_groups);
for (i = 0; i < nr_node_ids; i++) {
unsigned int ncpus, v;
struct node_groups *nv = &node_groups[i];
if (nv->ngroups == UINT_MAX)
continue;
/* Get the cpus on this node which are in the mask */
cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]);
ncpus = cpumask_weight(nmsk);
if (!ncpus)
continue;
WARN_ON_ONCE(nv->ngroups > ncpus);
/* Account for rounding errors */
extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups);
/* Spread allocated groups on CPUs of the current node */
for (v = 0; v < nv->ngroups; v++, curgrp++) {
cpus_per_grp = ncpus / nv->ngroups;
/* Account for extra groups to compensate rounding errors */
if (extra_grps) {
cpus_per_grp++;
--extra_grps;
}
/*
* wrapping has to be considered given 'startgrp'
* may start anywhere
*/
if (curgrp >= last_grp)
curgrp = 0;
grp_spread_init_one(&masks[curgrp], nmsk,
cpus_per_grp);
}
done += nv->ngroups;
}
kfree(node_groups);
return done;
}
/**
* group_cpus_evenly - Group all CPUs evenly per NUMA/CPU locality
* @numgrps: number of groups
*
* Return: cpumask array if successful, NULL otherwise. And each element
* includes CPUs assigned to this group
*
* Try to put close CPUs from viewpoint of CPU and NUMA locality into
* same group, and run two-stage grouping:
* 1) allocate present CPUs on these groups evenly first
* 2) allocate other possible CPUs on these groups evenly
*
* We guarantee in the resulted grouping that all CPUs are covered, and
* no same CPU is assigned to multiple groups
*/
struct cpumask *group_cpus_evenly(unsigned int numgrps)
{
unsigned int curgrp = 0, nr_present = 0, nr_others = 0;
cpumask_var_t *node_to_cpumask;
cpumask_var_t nmsk, npresmsk;
int ret = -ENOMEM;
struct cpumask *masks = NULL;
if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
return NULL;
if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
goto fail_nmsk;
node_to_cpumask = alloc_node_to_cpumask();
if (!node_to_cpumask)
goto fail_npresmsk;
masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL);
if (!masks)
goto fail_node_to_cpumask;
/* Stabilize the cpumasks */
cpus_read_lock();
build_node_to_cpumask(node_to_cpumask);
/* grouping present CPUs first */
ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
cpu_present_mask, nmsk, masks);
if (ret < 0)
goto fail_build_affinity;
nr_present = ret;
/*
* Allocate non present CPUs starting from the next group to be
* handled. If the grouping of present CPUs already exhausted the
* group space, assign the non present CPUs to the already
* allocated out groups.
*/
if (nr_present >= numgrps)
curgrp = 0;
else
curgrp = nr_present;
cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
npresmsk, nmsk, masks);
if (ret >= 0)
nr_others = ret;
fail_build_affinity:
cpus_read_unlock();
if (ret >= 0)
WARN_ON(nr_present + nr_others < numgrps);
fail_node_to_cpumask:
free_node_to_cpumask(node_to_cpumask);
fail_npresmsk:
free_cpumask_var(npresmsk);
fail_nmsk:
free_cpumask_var(nmsk);
if (ret < 0) {
kfree(masks);
return NULL;
}
return masks;
}
#else /* CONFIG_SMP */
struct cpumask *group_cpus_evenly(unsigned int numgrps)
{
struct cpumask *masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL);
if (!masks)
return NULL;
/* assign all CPUs(cpu 0) to the 1st group only */
cpumask_copy(&masks[0], cpu_possible_mask);
return masks;
}
#endif /* CONFIG_SMP */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment