Commit d7b6de14 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'core/softlockup-for-linus' of...

Merge branch 'core/softlockup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'core/softlockup-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  softlockup: fix invalid proc_handler for softlockup_panic
  softlockup: fix watchdog task wakeup frequency
  softlockup: fix watchdog task wakeup frequency
  softlockup: show irqtrace
  softlockup: print a module list on being stuck
  softlockup: fix NMI hangs due to lock race - 2.6.26-rc regression
  softlockup: fix false positives on nohz if CPU is 100% idle for more than 60 seconds
  softlockup: fix softlockup_thresh fix
  softlockup: fix softlockup_thresh unaligned access and disable detection at runtime
  softlockup: allow panic on lockup
parents 30d38542 4dca10a9
...@@ -2034,6 +2034,9 @@ and is between 256 and 4096 characters. It is defined in the file ...@@ -2034,6 +2034,9 @@ and is between 256 and 4096 characters. It is defined in the file
snd-ymfpci= [HW,ALSA] snd-ymfpci= [HW,ALSA]
softlockup_panic=
[KNL] Should the soft-lockup detector generate panics.
sonypi.*= [HW] Sony Programmable I/O Control Device driver sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/sonypi.txt See Documentation/sonypi.txt
......
...@@ -295,10 +295,11 @@ extern void softlockup_tick(void); ...@@ -295,10 +295,11 @@ extern void softlockup_tick(void);
extern void spawn_softlockup_task(void); extern void spawn_softlockup_task(void);
extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog(void);
extern void touch_all_softlockup_watchdogs(void); extern void touch_all_softlockup_watchdogs(void);
extern unsigned long softlockup_thresh; extern unsigned int softlockup_panic;
extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_check_count;
extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_timeout_secs;
extern unsigned long sysctl_hung_task_warnings; extern unsigned long sysctl_hung_task_warnings;
extern int softlockup_thresh;
#else #else
static inline void softlockup_tick(void) static inline void softlockup_tick(void)
{ {
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/lockdep.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -25,7 +26,22 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp); ...@@ -25,7 +26,22 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp);
static DEFINE_PER_CPU(struct task_struct *, watchdog_task); static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
static int __read_mostly did_panic; static int __read_mostly did_panic;
unsigned long __read_mostly softlockup_thresh = 60; int __read_mostly softlockup_thresh = 60;
/*
* Should we panic (and reboot, if panic_timeout= is set) when a
* soft-lockup occurs:
*/
unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
static int __init softlockup_panic_setup(char *str)
{
softlockup_panic = simple_strtoul(str, NULL, 0);
return 1;
}
__setup("softlockup_panic=", softlockup_panic_setup);
static int static int
softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
...@@ -84,6 +100,14 @@ void softlockup_tick(void) ...@@ -84,6 +100,14 @@ void softlockup_tick(void)
struct pt_regs *regs = get_irq_regs(); struct pt_regs *regs = get_irq_regs();
unsigned long now; unsigned long now;
/* Is detection switched off? */
if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) {
/* Be sure we don't false trigger if switched back on */
if (touch_timestamp)
per_cpu(touch_timestamp, this_cpu) = 0;
return;
}
if (touch_timestamp == 0) { if (touch_timestamp == 0) {
__touch_softlockup_watchdog(); __touch_softlockup_watchdog();
return; return;
...@@ -92,11 +116,8 @@ void softlockup_tick(void) ...@@ -92,11 +116,8 @@ void softlockup_tick(void)
print_timestamp = per_cpu(print_timestamp, this_cpu); print_timestamp = per_cpu(print_timestamp, this_cpu);
/* report at most once a second */ /* report at most once a second */
if ((print_timestamp >= touch_timestamp && if (print_timestamp == touch_timestamp || did_panic)
print_timestamp < (touch_timestamp + 1)) ||
did_panic || !per_cpu(watchdog_task, this_cpu)) {
return; return;
}
/* do not print during early bootup: */ /* do not print during early bootup: */
if (unlikely(system_state != SYSTEM_RUNNING)) { if (unlikely(system_state != SYSTEM_RUNNING)) {
...@@ -106,8 +127,11 @@ void softlockup_tick(void) ...@@ -106,8 +127,11 @@ void softlockup_tick(void)
now = get_timestamp(this_cpu); now = get_timestamp(this_cpu);
/* Wake up the high-prio watchdog task every second: */ /*
if (now > (touch_timestamp + 1)) * Wake up the high-prio watchdog task twice per
* threshold timespan.
*/
if (now > touch_timestamp + softlockup_thresh/2)
wake_up_process(per_cpu(watchdog_task, this_cpu)); wake_up_process(per_cpu(watchdog_task, this_cpu));
/* Warn about unreasonable delays: */ /* Warn about unreasonable delays: */
...@@ -121,11 +145,15 @@ void softlockup_tick(void) ...@@ -121,11 +145,15 @@ void softlockup_tick(void)
this_cpu, now - touch_timestamp, this_cpu, now - touch_timestamp,
current->comm, task_pid_nr(current)); current->comm, task_pid_nr(current));
print_modules(); print_modules();
print_irqtrace_events(current);
if (regs) if (regs)
show_regs(regs); show_regs(regs);
else else
dump_stack(); dump_stack();
spin_unlock(&print_lock); spin_unlock(&print_lock);
if (softlockup_panic)
panic("softlockup: hung tasks");
} }
/* /*
...@@ -178,6 +206,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now) ...@@ -178,6 +206,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now)
t->last_switch_timestamp = now; t->last_switch_timestamp = now;
touch_nmi_watchdog(); touch_nmi_watchdog();
if (softlockup_panic)
panic("softlockup: blocked tasks");
} }
/* /*
......
...@@ -88,12 +88,13 @@ extern int rcutorture_runnable; ...@@ -88,12 +88,13 @@ extern int rcutorture_runnable;
#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
/* Constants used for minimum and maximum */ /* Constants used for minimum and maximum */
#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) #if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
static int one = 1; static int one = 1;
#endif #endif
#ifdef CONFIG_DETECT_SOFTLOCKUP #ifdef CONFIG_DETECT_SOFTLOCKUP
static int sixty = 60; static int sixty = 60;
static int neg_one = -1;
#endif #endif
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
...@@ -737,15 +738,26 @@ static struct ctl_table kern_table[] = { ...@@ -737,15 +738,26 @@ static struct ctl_table kern_table[] = {
}, },
#endif #endif
#ifdef CONFIG_DETECT_SOFTLOCKUP #ifdef CONFIG_DETECT_SOFTLOCKUP
{
.ctl_name = CTL_UNNUMBERED,
.procname = "softlockup_panic",
.data = &softlockup_panic,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &zero,
.extra2 = &one,
},
{ {
.ctl_name = CTL_UNNUMBERED, .ctl_name = CTL_UNNUMBERED,
.procname = "softlockup_thresh", .procname = "softlockup_thresh",
.data = &softlockup_thresh, .data = &softlockup_thresh,
.maxlen = sizeof(unsigned long), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_doulongvec_minmax, .proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec, .strategy = &sysctl_intvec,
.extra1 = &one, .extra1 = &neg_one,
.extra2 = &sixty, .extra2 = &sixty,
}, },
{ {
......
...@@ -140,8 +140,6 @@ void tick_nohz_update_jiffies(void) ...@@ -140,8 +140,6 @@ void tick_nohz_update_jiffies(void)
if (!ts->tick_stopped) if (!ts->tick_stopped)
return; return;
touch_softlockup_watchdog();
cpu_clear(cpu, nohz_cpu_mask); cpu_clear(cpu, nohz_cpu_mask);
now = ktime_get(); now = ktime_get();
ts->idle_waketime = now; ts->idle_waketime = now;
...@@ -149,6 +147,8 @@ void tick_nohz_update_jiffies(void) ...@@ -149,6 +147,8 @@ void tick_nohz_update_jiffies(void)
local_irq_save(flags); local_irq_save(flags);
tick_do_update_jiffies64(now); tick_do_update_jiffies64(now);
local_irq_restore(flags); local_irq_restore(flags);
touch_softlockup_watchdog();
} }
void tick_nohz_stop_idle(int cpu) void tick_nohz_stop_idle(int cpu)
......
...@@ -150,7 +150,7 @@ config DETECT_SOFTLOCKUP ...@@ -150,7 +150,7 @@ config DETECT_SOFTLOCKUP
help help
Say Y here to enable the kernel to detect "soft lockups", Say Y here to enable the kernel to detect "soft lockups",
which are bugs that cause the kernel to loop in kernel which are bugs that cause the kernel to loop in kernel
mode for more than 10 seconds, without giving other tasks a mode for more than 60 seconds, without giving other tasks a
chance to run. chance to run.
When a soft-lockup is detected, the kernel will print the When a soft-lockup is detected, the kernel will print the
...@@ -162,6 +162,30 @@ config DETECT_SOFTLOCKUP ...@@ -162,6 +162,30 @@ config DETECT_SOFTLOCKUP
can be detected via the NMI-watchdog, on platforms that can be detected via the NMI-watchdog, on platforms that
support it.) support it.)
config BOOTPARAM_SOFTLOCKUP_PANIC
bool "Panic (Reboot) On Soft Lockups"
depends on DETECT_SOFTLOCKUP
help
Say Y here to enable the kernel to panic on "soft lockups",
which are bugs that cause the kernel to loop in kernel
mode for more than 60 seconds, without giving other tasks a
chance to run.
The panic can be used in combination with panic_timeout,
to cause the system to reboot automatically after a
lockup has been detected. This feature is useful for
high-availability systems that have uptime guarantees and
where a lockup must be resolved ASAP.
Say N if unsure.
config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
int
depends on DETECT_SOFTLOCKUP
range 0 1
default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
config SCHED_DEBUG config SCHED_DEBUG
bool "Collect scheduler debugging info" bool "Collect scheduler debugging info"
depends on DEBUG_KERNEL && PROC_FS depends on DEBUG_KERNEL && PROC_FS
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment