Commit 24726275 authored by Eric DeVolder's avatar Eric DeVolder Committed by Andrew Morton

crash: add generic infrastructure for crash hotplug support

To support crash hotplug, a mechanism is needed to update the crash
elfcorehdr upon CPU or memory changes (eg.  hot un/plug or off/ onlining).
The crash elfcorehdr describes the CPUs and memory to be written into the
vmcore.

To track CPU changes, callbacks are registered with the cpuhp mechanism
via cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN).  The crash hotplug
elfcorehdr update has no explicit ordering requirement (relative to other
cpuhp states), so meets the criteria for utilizing CPUHP_BP_PREPARE_DYN. 
CPUHP_BP_PREPARE_DYN is a dynamic state and avoids the need to introduce a
new state for crash hotplug.  Also, CPUHP_BP_PREPARE_DYN is the last state
in the PREPARE group, just prior to the STARTING group, which is very
close to the CPU starting up in a plug/online situation, or stopping in a
unplug/ offline situation.  This minimizes the window of time during an
actual plug/online or unplug/offline situation in which the elfcorehdr
would be inaccurate.  Note that for a CPU being unplugged or offlined, the
CPU will still be present in the list of CPUs generated by
crash_prepare_elf64_headers().  However, there is no need to explicitly
omit the CPU, see justification in 'crash: change
crash_prepare_elf64_headers() to for_each_possible_cpu()'.

To track memory changes, a notifier is registered to capture the memblock
MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier().

The CPU callbacks and memory notifiers invoke crash_handle_hotplug_event()
which performs needed tasks and then dispatches the event to the
architecture specific arch_crash_handle_hotplug_event() to update the
elfcorehdr with the current state of CPUs and memory.  During the process,
the kexec_lock is held.

Link: https://lkml.kernel.org/r/20230814214446.6659-3-eric.devolder@oracle.comSigned-off-by: default avatarEric DeVolder <eric.devolder@oracle.com>
Reviewed-by: default avatarSourabh Jain <sourabhjain@linux.ibm.com>
Acked-by: default avatarHari Bathini <hbathini@linux.ibm.com>
Acked-by: default avatarBaoquan He <bhe@redhat.com>
Cc: Akhil Raj <lf32.dev@gmail.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Mimi Zohar <zohar@linux.ibm.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Weißschuh <linux@weissschuh.net>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 6f991cc3
......@@ -104,4 +104,11 @@ extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_ma
struct kimage;
struct kexec_segment;
#define KEXEC_CRASH_HP_NONE 0
#define KEXEC_CRASH_HP_ADD_CPU 1
#define KEXEC_CRASH_HP_REMOVE_CPU 2
#define KEXEC_CRASH_HP_ADD_MEMORY 3
#define KEXEC_CRASH_HP_REMOVE_MEMORY 4
#define KEXEC_CRASH_HP_INVALID_CPU -1U
#endif /* LINUX_CRASH_CORE_H */
......@@ -33,6 +33,7 @@ extern note_buf_t __percpu *crash_notes;
#include <linux/compat.h>
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/highmem.h>
#include <asm/kexec.h>
/* Verify architecture specific macros are defined */
......@@ -345,6 +346,12 @@ struct kimage {
struct purgatory_info purgatory_info;
#endif
#ifdef CONFIG_CRASH_HOTPLUG
int hp_action;
int elfcorehdr_index;
bool elfcorehdr_updated;
#endif
#ifdef CONFIG_IMA_KEXEC
/* Virtual address of IMA measurement buffer for kexec syscall */
void *ima_buffer;
......@@ -475,6 +482,10 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g
static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
#endif
#ifndef arch_crash_handle_hotplug_event
static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
#endif
#else /* !CONFIG_KEXEC_CORE */
struct pt_regs;
struct task_struct;
......
......@@ -112,4 +112,35 @@ config CRASH_DUMP
For s390, this option also enables zfcpdump.
See also <file:Documentation/s390/zfcpdump.rst>
config CRASH_HOTPLUG
bool "Update the crash elfcorehdr on system configuration changes"
default y
depends on CRASH_DUMP && (HOTPLUG_CPU || MEMORY_HOTPLUG)
depends on ARCH_SUPPORTS_CRASH_HOTPLUG
help
Enable direct update to the crash elfcorehdr (which contains
the list of CPUs and memory regions to be dumped upon a crash)
in response to hot plug/unplug or online/offline of CPUs or
memory. This is a much more advanced approach than userspace
attempting that.
If unsure, say Y.
config CRASH_MAX_MEMORY_RANGES
int "Specify the maximum number of memory regions for the elfcorehdr"
default 8192
depends on CRASH_HOTPLUG
help
For the kexec_file_load() syscall path, specify the maximum number of
memory regions that the elfcorehdr buffer/segment can accommodate.
These regions are obtained via walk_system_ram_res(); eg. the
'System RAM' entries in /proc/iomem.
This value is combined with NR_CPUS_DEFAULT and multiplied by
sizeof(Elf64_Phdr) to determine the final elfcorehdr memory buffer/
segment size.
The value 8192, for example, covers a (sparsely populated) 1TiB system
consisting of 128MiB memblocks, while resulting in an elfcorehdr
memory buffer/segment size under 1MiB. This represents a sane choice
to accommodate both baremetal and virtual machine configurations.
endmenu
......@@ -11,6 +11,8 @@
#include <linux/vmalloc.h>
#include <linux/sizes.h>
#include <linux/kexec.h>
#include <linux/memory.h>
#include <linux/cpuhotplug.h>
#include <asm/page.h>
#include <asm/sections.h>
......@@ -18,6 +20,7 @@
#include <crypto/sha1.h>
#include "kallsyms_internal.h"
#include "kexec_internal.h"
/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t __percpu *crash_notes;
......@@ -733,3 +736,142 @@ static int __init crash_notes_memory_init(void)
return 0;
}
subsys_initcall(crash_notes_memory_init);
#ifdef CONFIG_CRASH_HOTPLUG
#undef pr_fmt
#define pr_fmt(fmt) "crash hp: " fmt
/*
* To accurately reflect hot un/plug changes of cpu and memory resources
* (including onling and offlining of those resources), the elfcorehdr
* (which is passed to the crash kernel via the elfcorehdr= parameter)
* must be updated with the new list of CPUs and memories.
*
* In order to make changes to elfcorehdr, two conditions are needed:
* First, the segment containing the elfcorehdr must be large enough
* to permit a growing number of resources; the elfcorehdr memory size
* is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES.
* Second, purgatory must explicitly exclude the elfcorehdr from the
* list of segments it checks (since the elfcorehdr changes and thus
* would require an update to purgatory itself to update the digest).
*/
static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
{
struct kimage *image;
/* Obtain lock while changing crash information */
if (!kexec_trylock()) {
pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
return;
}
/* Check kdump is not loaded */
if (!kexec_crash_image)
goto out;
image = kexec_crash_image;
if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
else
pr_debug("hp_action %u\n", hp_action);
/*
* The elfcorehdr_index is set to -1 when the struct kimage
* is allocated. Find the segment containing the elfcorehdr,
* if not already found.
*/
if (image->elfcorehdr_index < 0) {
unsigned long mem;
unsigned char *ptr;
unsigned int n;
for (n = 0; n < image->nr_segments; n++) {
mem = image->segment[n].mem;
ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT));
if (ptr) {
/* The segment containing elfcorehdr */
if (memcmp(ptr, ELFMAG, SELFMAG) == 0)
image->elfcorehdr_index = (int)n;
kunmap_local(ptr);
}
}
}
if (image->elfcorehdr_index < 0) {
pr_err("unable to locate elfcorehdr segment");
goto out;
}
/* Needed in order for the segments to be updated */
arch_kexec_unprotect_crashkres();
/* Differentiate between normal load and hotplug update */
image->hp_action = hp_action;
/* Now invoke arch-specific update handler */
arch_crash_handle_hotplug_event(image);
/* No longer handling a hotplug event */
image->hp_action = KEXEC_CRASH_HP_NONE;
image->elfcorehdr_updated = true;
/* Change back to read-only */
arch_kexec_protect_crashkres();
/* Errors in the callback is not a reason to rollback state */
out:
/* Release lock now that update complete */
kexec_unlock();
}
static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
{
switch (val) {
case MEM_ONLINE:
crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY,
KEXEC_CRASH_HP_INVALID_CPU);
break;
case MEM_OFFLINE:
crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY,
KEXEC_CRASH_HP_INVALID_CPU);
break;
}
return NOTIFY_OK;
}
static struct notifier_block crash_memhp_nb = {
.notifier_call = crash_memhp_notifier,
.priority = 0
};
static int crash_cpuhp_online(unsigned int cpu)
{
crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
return 0;
}
static int crash_cpuhp_offline(unsigned int cpu)
{
crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
return 0;
}
static int __init crash_hotplug_init(void)
{
int result = 0;
if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
register_memory_notifier(&crash_memhp_nb);
if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN,
"crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline);
}
return result;
}
subsys_initcall(crash_hotplug_init);
#endif
......@@ -274,6 +274,12 @@ struct kimage *do_kimage_alloc_init(void)
/* Initialize the list of unusable pages */
INIT_LIST_HEAD(&image->unusable_pages);
#ifdef CONFIG_CRASH_HOTPLUG
image->hp_action = KEXEC_CRASH_HP_NONE;
image->elfcorehdr_index = -1;
image->elfcorehdr_updated = false;
#endif
return image;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment