Commit 5dd7d1b6 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] hangcheck-timer

Patch from: Joel Becker <Joel.Becker@oracle.com>

This kernel module will detect long durations when jiffies has failed to
increment, and will reboot the machine in response.

Joel says:


"Here's why Oracle wants such a thing.  We run clusters.  Imagine a two node
 cluster.  Node1 pauses completely for some reason.  There are multiple
 reasons this can happen.  A bad driver can udelay() for 90 seconds (qla used
 to do this).  zVM on S/390 can page Linux out for minutes at a time.
 Anything that causes the box to freeze.  Jiffies does *not* count during
 this, so when Node1 returns it feels that no time has passed.

 Node2, however, has been counting time.  When Node1 goes away, the Oracle
 cluster manager starts looking for it.  After a timeout, it gives up.  It
 then recovers any in-progress transactions from Node1.  After that, it
 starts new operations, modifying the data in ways that Node1 has no idea
 about (it's still out to lunch).

 When Node1 finally returns (udelay() ends, zVM pages it in, whatever), any
 I/O that it has queued or is about to queue will get sent to the disk.
 Oops, you've just corrupted your shared data.

 hangcheck-timer should catch this and reboot the box.

 This is why Oracle wants this driver.  We figure that such functionality
 would be beneficial to others as well, so we posted to l-k.  We'd all hope
 that driver writers don't udelay() for 90s, but S/390 with zVM is still
 around.  Some folks might want to notice when it happens.  I am sure other
 things exist that trigger the same symptoms."
parent 46052b73
...@@ -992,5 +992,12 @@ config RAW_DRIVER ...@@ -992,5 +992,12 @@ config RAW_DRIVER
Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O. Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O.
See the raw(8) manpage for more details. See the raw(8) manpage for more details.
config HANGCHECK_TIMER
tristate "Hangcheck timer"
help
The hangcheck-timer module detects when the system has gone
out to lunch past a certain margin. It can reboot the system
or merely print a warning.
endmenu endmenu
...@@ -77,6 +77,7 @@ obj-$(CONFIG_DRM) += drm/ ...@@ -77,6 +77,7 @@ obj-$(CONFIG_DRM) += drm/
obj-$(CONFIG_PCMCIA) += pcmcia/ obj-$(CONFIG_PCMCIA) += pcmcia/
obj-$(CONFIG_IPMI_HANDLER) += ipmi/ obj-$(CONFIG_IPMI_HANDLER) += ipmi/
obj-$(CONFIG_HANGCHECK_TIMER) += hangcheck-timer.o
# Files generated that shall be removed upon make clean # Files generated that shall be removed upon make clean
clean-files := consolemap_deftbl.c defkeymap.c qtronixmap.c clean-files := consolemap_deftbl.c defkeymap.c qtronixmap.c
......
/*
* hangcheck-timer.c
*
* Driver for a little io fencing timer.
*
* Copyright (C) 2002 Oracle Corporation. All rights reserved.
*
* Author: Joel Becker <joel.becker@oracle.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have recieved a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
/*
* The hangcheck-timer driver uses the TSC to catch delays that
* jiffies does not notice. A timer is set. When the timer fires, it
* checks whether it was delayed and if that delay exceeds a given
* margin of error. The hangcheck_tick module paramter takes the timer
* duration in seconds. The hangcheck_margin parameter defines the
* margin of error, in seconds. The defaults are 60 seconds for the
* timer and 180 seconds for the margin of error. IOW, a timer is set
* for 60 seconds. When the timer fires, the callback checks the
* actual duration that the timer waited. If the duration exceeds the
* alloted time and margin (here 60 + 180, or 240 seconds), the machine
* is restarted. A healthy machine will have the duration match the
* expected timeout very closely.
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/reboot.h>
#include <linux/init.h>
#include <asm/uaccess.h>
#define VERSION_STR "0.5.0"
#define DEFAULT_IOFENCE_MARGIN 60 /* Default fudge factor, in seconds */
#define DEFAULT_IOFENCE_TICK 180 /* Default timer timeout, in seconds */
static int hangcheck_tick = DEFAULT_IOFENCE_TICK;
static int hangcheck_margin = DEFAULT_IOFENCE_MARGIN;
static int hangcheck_reboot; /* Defaults to not reboot */
/* Driver options */
module_param(hangcheck_tick, int, 0);
MODULE_PARM_DESC(hangcheck_tick, "Timer delay.");
module_param(hangcheck_margin, int, 0);
MODULE_PARM_DESC(hangcheck_margin, "If the hangcheck timer has been delayed more than hangcheck_margin seconds, the driver will fire.");
module_param(hangcheck_reboot, int, 0);
MODULE_PARM_DESC(hangcheck_reboot, "If nonzero, the machine will reboot when the timer margin is exceeded.");
MODULE_AUTHOR("Joel Becker");
MODULE_DESCRIPTION("Hangcheck-timer detects when the system has gone out to lunch past a certain margin.");
MODULE_LICENSE("GPL");
/* Last time scheduled */
static unsigned long long hangcheck_tsc, hangcheck_tsc_margin;
static void hangcheck_fire(unsigned long);
static struct timer_list hangcheck_ticktock =
TIMER_INITIALIZER(hangcheck_fire, 0, 0);
static void hangcheck_fire(unsigned long data)
{
unsigned long long cur_tsc, tsc_diff;
cur_tsc = get_cycles();
if (cur_tsc > hangcheck_tsc)
tsc_diff = cur_tsc - hangcheck_tsc;
else
tsc_diff = (cur_tsc + (~0ULL - hangcheck_tsc)); /* or something */
if (tsc_diff > hangcheck_tsc_margin) {
if (hangcheck_reboot) {
printk(KERN_CRIT "Hangcheck: hangcheck is restarting the machine.\n");
machine_restart(NULL);
} else {
printk(KERN_CRIT "Hangcheck: hangcheck value past margin!\n");
}
}
mod_timer(&hangcheck_ticktock, jiffies + (hangcheck_tick*HZ));
hangcheck_tsc = get_cycles();
}
static int __init hangcheck_init(void)
{
printk("Hangcheck: starting hangcheck timer %s (tick is %d seconds, margin is %d seconds).\n",
VERSION_STR, hangcheck_tick, hangcheck_margin);
hangcheck_tsc_margin = hangcheck_margin + hangcheck_tick;
hangcheck_tsc_margin *= HZ;
hangcheck_tsc_margin *= current_cpu_data.loops_per_jiffy;
hangcheck_tsc = get_cycles();
mod_timer(&hangcheck_ticktock, jiffies + (hangcheck_tick*HZ));
return 0;
}
static void __exit hangcheck_exit(void)
{
del_timer_sync(&hangcheck_ticktock);
}
module_init(hangcheck_init);
module_exit(hangcheck_exit);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment