Commit e3ce78c4 authored by Guilherme G. Piccoli's avatar Guilherme G. Piccoli Committed by Khalid Elmously

UBUNTU: SAUCE: x86/quirks: Add parameter to clear MSIs early on boot

BugLink: https://bugs.launchpad.net/bugs/1797990

We observed a kdump failure in x86 that was narrowed down to MSI irq
storm coming from a PCI network device. The bug manifests as a lack of
progress in the boot process of kdump kernel, and a flood of kernel
messages like:

[...]
[ 342.265294] do_IRQ: 0.155 No irq handler for vector
[ 342.266916] do_IRQ: 0.155 No irq handler for vector
[ 347.258422] do_IRQ: 14053260 callbacks suppressed
[...]

The root cause of the issue is that kexec process of the kdump kernel
doesn't ensure PCI devices are reset or MSI capabilities are disabled,
so a PCI adapter could produce a huge amount of irqs which would steal
all the processing time for the CPU (specially since we usually restrict
kdump kernel to use a single CPU only).

This patch implements the kernel parameter "pci=clearmsi" to clear the
MSI/MSI-X enable bits in the Message Control register for all PCI devices
during early boot time, thus preventing potential issues in the kexec'ed
kernel. PCI spec also supports/enforces this need (see PCI Local Bus
spec sections 6.8.1.3 and 6.8.2.3).
Suggested-by: default avatarDan Streetman <ddstreet@canonical.com>
Suggested-by: default avatarGavin Shan <shan.gavin@linux.alibaba.com>
Signed-off-by: default avatarGuilherme G. Piccoli <gpiccoli@canonical.com>
[mfo: backport to ubuntu-xenial:
 - different path for Documentation/.../kernel-parameters.txt
 - update context lines in pci-direct.h and early-quirks.c]
Signed-off-by: default avatarMauricio Faria de Oliveira <mfo@canonical.com>
Acked-by: default avatarKhalid Elmously <khalid.elmously@canonical.com>
Acked-by: default avatarThadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: default avatarKhalid Elmously <khalid.elmously@canonical.com>
parent fbbc8548
......@@ -2867,6 +2867,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nomsi [MSI] If the PCI_MSI kernel config parameter is
enabled, this kernel boot option can be used to
disable the use of MSI interrupts system-wide.
clearmsi [X86] Clears MSI/MSI-X enable bits early in boot
time in order to avoid issues like adapters
screaming irqs and preventing boot progress.
Also, it enforces the PCI Local Bus spec
rule that those bits should be 0 in system reset
events (useful for kexec/kdump cases).
noioapicquirk [APIC] Disable all boot interrupt quirks.
Safety option to keep boot IRQs enabled. This
should never be necessary.
......
......@@ -14,6 +14,7 @@ extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val);
extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val);
extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val);
extern unsigned int pci_early_clear_msi;
extern int early_pci_allowed(void);
extern unsigned int pci_early_dump_regs;
......
......@@ -27,6 +27,37 @@
#include <asm/irq_remapping.h>
#include <asm/early_ioremap.h>
static void __init early_pci_clear_msi(int bus, int slot, int func)
{
int pos;
u16 ctrl;
if (likely(!pci_early_clear_msi))
return;
pr_info_once("Clearing MSI/MSI-X enable bits early in boot (quirk)\n");
pos = pci_early_find_cap(bus, slot, func, PCI_CAP_ID_MSI);
if (pos) {
ctrl = read_pci_config_16(bus, slot, func, pos + PCI_MSI_FLAGS);
ctrl &= ~PCI_MSI_FLAGS_ENABLE;
write_pci_config_16(bus, slot, func, pos + PCI_MSI_FLAGS, ctrl);
/* Read again to flush previous write */
ctrl = read_pci_config_16(bus, slot, func, pos + PCI_MSI_FLAGS);
}
pos = pci_early_find_cap(bus, slot, func, PCI_CAP_ID_MSIX);
if (pos) {
ctrl = read_pci_config_16(bus, slot, func, pos + PCI_MSIX_FLAGS);
ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
write_pci_config_16(bus, slot, func, pos + PCI_MSIX_FLAGS, ctrl);
/* Read again to flush previous write */
ctrl = read_pci_config_16(bus, slot, func, pos + PCI_MSIX_FLAGS);
}
}
#define dev_err(msg) pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg)
static void __init fix_hypertransport_config(int num, int slot, int func)
......@@ -701,6 +732,7 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
{ PCI_VENDOR_ID_BROADCOM, 0x4331,
PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
{ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, early_pci_clear_msi},
{}
};
......
......@@ -35,6 +35,7 @@ int noioapicreroute = 1;
#endif
int pcibios_last_bus = -1;
unsigned long pirq_table_addr;
unsigned int pci_early_clear_msi;
const struct pci_raw_ops *__read_mostly raw_pci_ops;
const struct pci_raw_ops *__read_mostly raw_pci_ext_ops;
......@@ -621,6 +622,9 @@ char *__init pcibios_setup(char *str)
} else if (!strcmp(str, "skip_isa_align")) {
pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
return NULL;
} else if (!strcmp(str, "clearmsi")) {
pci_early_clear_msi = 1;
return NULL;
} else if (!strcmp(str, "noioapicquirk")) {
noioapicquirk = 1;
return NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment