Commit 9578658e authored by Neil Brown's avatar Neil Brown Committed by Linus Torvalds

[PATCH] md: Fulltime delayed 'safe_mode' for md

From: Angus Sawyer <angus.sawyer@dsl.pipex.com>

If there are no writes for 20 milliseconds, write out superblock
to mark array as clean.  Write out superblock with
dirty flag before allowing any further write to succeed.

If an md thread gets signaled with SIGKILL, reduce the
delay to 0.


Also tidy up some printk's and make sure writing the
superblock isn't noisy.
parent 8eec7ce0
......@@ -219,6 +219,7 @@ static mddev_t * mddev_find(int unit)
init_MUTEX(&new->reconfig_sem);
INIT_LIST_HEAD(&new->disks);
INIT_LIST_HEAD(&new->all_mddevs);
init_timer(&new->safemode_timer);
atomic_set(&new->active, 1);
blk_queue_make_request(&new->queue, md_fail_request);
......@@ -701,10 +702,8 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
sb->recovery_cp = mddev->recovery_cp;
sb->cp_events_hi = (mddev->events>>32);
sb->cp_events_lo = (u32)mddev->events;
if (mddev->recovery_cp == MaxSector) {
printk(KERN_INFO "md: marking sb clean...\n");
if (mddev->recovery_cp == MaxSector)
sb->state = (1<< MD_SB_CLEAN);
}
} else
sb->recovery_cp = 0;
......@@ -1005,7 +1004,7 @@ static int write_disk_sb(mdk_rdev_t * rdev)
sb_offset = calc_dev_sboffset(rdev->bdev);
if (rdev->sb_offset != sb_offset) {
printk(KERN_INFO "%s's sb offset has changed from %llu to %llu, skipping\n",
printk("%s's sb offset has changed from %llu to %llu, skipping\n",
bdev_partition_name(rdev->bdev),
(unsigned long long)rdev->sb_offset,
(unsigned long long)sb_offset);
......@@ -1025,7 +1024,7 @@ static int write_disk_sb(mdk_rdev_t * rdev)
goto skip;
}
printk(KERN_INFO "(write) %s's sb offset: %llu\n", bdev_partition_name(rdev->bdev), (unsigned long long)sb_offset);
dprintk("(write) %s's sb offset: %llu\n", bdev_partition_name(rdev->bdev), (unsigned long long)sb_offset);
if (!sync_page_io(rdev->bdev, sb_offset<<1, MD_SB_BYTES, rdev->sb_page, WRITE))
goto fail;
......@@ -1076,20 +1075,20 @@ static void md_update_sb(mddev_t * mddev)
if (!mddev->persistent)
return;
printk(KERN_INFO "md: updating md%d RAID superblock on device (in sync %d)\n",
dprintk(KERN_INFO "md: updating md%d RAID superblock on device (in sync %d)\n",
mdidx(mddev),mddev->in_sync);
err = 0;
ITERATE_RDEV(mddev,rdev,tmp) {
printk(KERN_INFO "md: ");
dprintk(KERN_INFO "md: ");
if (rdev->faulty)
printk("(skipping faulty ");
dprintk("(skipping faulty ");
printk("%s ", bdev_partition_name(rdev->bdev));
dprintk("%s ", bdev_partition_name(rdev->bdev));
if (!rdev->faulty) {
err += write_disk_sb(rdev);
} else
printk(")\n");
dprintk(")\n");
if (!err && mddev->level == LEVEL_MULTIPATH)
/* only need to write one superblock... */
break;
......@@ -1377,6 +1376,16 @@ static struct gendisk *md_probe(dev_t dev, int *part, void *data)
return NULL;
}
void md_wakeup_thread(mdk_thread_t *thread);
static void md_safemode_timeout(unsigned long data)
{
mddev_t *mddev = (mddev_t *) data;
mddev->safemode = 1;
md_wakeup_thread(mddev->thread);
}
#define TOO_BIG_CHUNKSIZE KERN_ERR \
"too big chunk_size: %d > %d\n"
......@@ -1518,9 +1527,9 @@ static int do_md_run(mddev_t * mddev)
}
atomic_set(&mddev->writes_pending,0);
mddev->safemode = 0;
if (mddev->pers->sync_request)
mddev->in_sync = 0;
else
mddev->safemode_timer.function = md_safemode_timeout;
mddev->safemode_timer.data = (unsigned long) mddev;
mddev->safemode_delay = (20 * HZ)/1000 +1; /* 20 msec delay */
mddev->in_sync = 1;
md_update_sb(mddev);
......@@ -1551,7 +1560,6 @@ static int restart_array(mddev_t *mddev)
goto out;
mddev->safemode = 0;
mddev->in_sync = 0;
md_update_sb(mddev);
mddev->ro = 0;
set_disk_ro(disk, 0);
......@@ -1597,6 +1605,8 @@ static int do_md_stop(mddev_t * mddev, int ro)
mddev->sync_thread = NULL;
}
del_timer_sync(&mddev->safemode_timer);
invalidate_device(mk_kdev(disk->major, disk->first_minor), 1);
if (ro) {
......@@ -1886,7 +1896,7 @@ static int get_array_info(mddev_t * mddev, void * arg)
info.utime = mddev->utime;
info.state = 0;
if (mddev->recovery_cp == MaxSector)
if (mddev->in_sync)
info.state = (1<<MD_SB_CLEAN);
info.active_disks = active;
info.working_disks = working;
......@@ -2896,13 +2906,14 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
void md_write_start(mddev_t *mddev)
{
if (mddev->safemode && !atomic_read(&mddev->writes_pending)) {
if (!atomic_read(&mddev->writes_pending)) {
mddev_lock_uninterruptible(mddev);
atomic_inc(&mddev->writes_pending);
if (mddev->in_sync) {
mddev->in_sync = 0;
del_timer(&mddev->safemode_timer);
md_update_sb(mddev);
}
atomic_inc(&mddev->writes_pending);
mddev_unlock(mddev);
} else
atomic_inc(&mddev->writes_pending);
......@@ -2910,12 +2921,16 @@ void md_write_start(mddev_t *mddev)
void md_write_end(mddev_t *mddev)
{
if (atomic_dec_and_test(&mddev->writes_pending) && mddev->safemode)
if (atomic_dec_and_test(&mddev->writes_pending)) {
if (mddev->safemode == 2)
md_wakeup_thread(mddev->thread);
else
mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
}
}
static inline void md_enter_safemode(mddev_t *mddev)
{
mddev_lock_uninterruptible(mddev);
if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
!mddev->in_sync && mddev->recovery_cp == MaxSector) {
......@@ -2923,13 +2938,16 @@ static inline void md_enter_safemode(mddev_t *mddev)
md_update_sb(mddev);
}
mddev_unlock(mddev);
if (mddev->safemode == 1)
mddev->safemode = 0;
}
void md_handle_safemode(mddev_t *mddev)
{
if (signal_pending(current)) {
printk(KERN_INFO "md: md%d in safe mode\n",mdidx(mddev));
mddev->safemode= 1;
printk(KERN_INFO "md: md%d in immediate safe mode\n",mdidx(mddev));
mddev->safemode = 2;
flush_signals(current);
}
if (mddev->safemode)
......
......@@ -239,9 +239,11 @@ struct mddev_s
atomic_t recovery_active; /* blocks scheduled, but not written */
wait_queue_head_t recovery_wait;
sector_t recovery_cp;
int safemode; /* if set, update "clean" superblock
unsigned int safemode; /* if set, update "clean" superblock
* when no writes pending.
*/
unsigned int safemode_delay;
struct timer_list safemode_timer;
atomic_t writes_pending;
request_queue_t queue; /* for plugging ... */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment