Commit 75f19a40 authored by Andrew Morton's avatar Andrew Morton Committed by Jaroslav Kysela

[PATCH] Add a sync_fs super_block operation

This is infrastructure for fixing the journalled-data ext3 unmount data
loss problem. It was sent for comment to linux-fsdevel a week ago; there
was none.

Add a `sync_fs' superblock operation whose mandate is to perform
filesystem-specific operations to ensure a successful sync.

It is called in two places:

1: fsync_super() - for umount.

2: sys_sync() - for global sync.

In the sys_sync() case we call all the ->write_super() methods first.
write_super() is an async flushing operation.  It should not block.

After that, we call all the ->sync_fs functions.  This is independent
of the state of s_dirt!  That was all confused up before, and in this
patch ->write_super() and ->sync_fs() are quite separate.

With ext3 as an example, the initial ->write_super() will start a
transaction, but will not wait on it.  (But only if s_dirt was set!)

The first ->sync_fs() call will get the IO underway.

The second ->sync_fs() call will wait on the IO.

And we really do need to be this elaborate, because all the testing of
s_dirt in there makes ->write_super() an unreliable way of detecting
when the VFS is trying to sync the filesystem.
parent 7404e32c
......@@ -92,6 +92,7 @@ prototypes:
void (*delete_inode) (struct inode *);
void (*put_super) (struct super_block *);
void (*write_super) (struct super_block *);
void (*sync_fs) (struct super_block *sb, int wait);
int (*statfs) (struct super_block *, struct statfs *);
int (*remount_fs) (struct super_block *, int *, char *);
void (*clear_inode) (struct inode *);
......@@ -108,6 +109,7 @@ delete_inode: no
clear_inode: no
put_super: yes yes maybe (see below)
write_super: no yes maybe (see below)
sync_fs: no no maybe (see below)
statfs: no no no
remount_fs: yes yes maybe (see below)
umount_begin: yes no maybe (see below)
......
......@@ -221,6 +221,9 @@ int fsync_super(struct super_block *sb)
lock_super(sb);
if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
sb->s_op->write_super(sb);
if (sb->s_op && sb->s_op->sync_fs) {
sb->s_op->sync_fs(sb, 1);
}
unlock_super(sb);
sync_blockdev(sb->s_bdev);
sync_inodes_sb(sb, 1);
......@@ -251,10 +254,12 @@ int fsync_bdev(struct block_device *bdev)
asmlinkage long sys_sync(void)
{
wakeup_bdflush(0);
sync_inodes(0); /* All mappings and inodes, including block devices */
sync_inodes(0); /* All mappings, inodes and their blockdevs */
DQUOT_SYNC(NULL);
sync_supers(); /* Write the superblocks */
sync_inodes(1); /* All the mappings and inodes, again. */
sync_supers(); /* Write the superblocks */
sync_filesystems(0); /* Start syncing the filesystems */
sync_filesystems(1); /* Waitingly sync the filesystems */
sync_inodes(1); /* Mappings, inodes and blockdevs, again. */
return 0;
}
......
......@@ -189,6 +189,8 @@ void generic_shutdown_super(struct super_block *sb)
if (sop) {
if (sop->write_super && sb->s_dirt)
sop->write_super(sb);
if (sop->sync_fs)
sop->sync_fs(sb, 1);
if (sop->put_super)
sop->put_super(sb);
}
......@@ -266,8 +268,8 @@ void drop_super(struct super_block *sb)
static inline void write_super(struct super_block *sb)
{
lock_super(sb);
if (sb->s_root && sb->s_dirt)
if (sb->s_op && sb->s_op->write_super)
if (sb->s_op && sb->s_root && sb->s_dirt)
if (sb->s_op->write_super)
sb->s_op->write_super(sb);
unlock_super(sb);
}
......@@ -296,6 +298,46 @@ void sync_supers(void)
spin_unlock(&sb_lock);
}
/*
* Call the ->sync_fs super_op against all filesytems which are r/w and
* which implement it.
*/
void sync_filesystems(int wait)
{
struct super_block * sb;
spin_lock(&sb_lock);
for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
sb = sb_entry(sb->s_list.next)) {
if (!sb->s_op)
continue;
if (!sb->s_op->sync_fs);
continue;
if (sb->s_flags & MS_RDONLY)
continue;
sb->s_need_sync_fs = 1;
}
spin_unlock(&sb_lock);
restart:
spin_lock(&sb_lock);
for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
sb = sb_entry(sb->s_list.next)) {
if (!sb->s_need_sync_fs)
continue;
sb->s_need_sync_fs = 0;
if (sb->s_flags & MS_RDONLY)
continue; /* hm. Was remounted r/w meanwhile */
sb->s_count++;
spin_unlock(&sb_lock);
down_read(&sb->s_umount);
sb->s_op->sync_fs(sb, wait);
drop_super(sb);
goto restart;
}
spin_unlock(&sb_lock);
}
/**
* get_super - get the superblock of a device
* @dev: device to get the superblock for
......
......@@ -631,6 +631,7 @@ struct super_block {
struct semaphore s_lock;
int s_count;
int s_syncing;
int s_need_sync_fs;
atomic_t s_active;
void *s_security;
......@@ -810,6 +811,7 @@ struct super_operations {
void (*delete_inode) (struct inode *);
void (*put_super) (struct super_block *);
void (*write_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
void (*write_super_lockfs) (struct super_block *);
void (*unlockfs) (struct super_block *);
int (*statfs) (struct super_block *, struct statfs *);
......@@ -1143,6 +1145,7 @@ extern void write_inode_now(struct inode *, int);
extern int filemap_fdatawrite(struct address_space *);
extern int filemap_fdatawait(struct address_space *);
extern void sync_supers(void);
extern void sync_filesystems(int wait);
extern sector_t bmap(struct inode *, sector_t);
extern int setattr_mask(unsigned int);
extern int notify_change(struct dentry *, struct iattr *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment