Commit 18ccf569 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] inode dirtying timestamp fix

It's a bit late in 2.6.5-rc for this, but it does fix a significant bug: for
the first five minutes after boot, prior to the jiffy wrap, /bin/sync doesn't
write all of the dirty data.

We currently record an inode's time-of-first-dirtying in the address_space.
This causes a few problems with dirty block special inodes: when the device
node for /dev/hda1 needs to be written back we bogusly inspect the timestamp
for the address_space, which belongs to a different inode.

So move the inode's dirtying time up into the inode itself.

This means that for block-special inodes, inode->dirtied_when represents the
time at which the inode itself was dirtied.

For regular files it represents the time at which the inode or its pages were
dirtied.

For blockdevs, the time-of-first-dirtying is recorded in the kernel-internal
blockdev inode.  Only I_DIRTY_PAGES makes sense on these inodes.

The reason all this works is that when dirtying a page we always run

	__mark_inode_dirty(page->mapping->host);

which refers to the kernel-internal blockdev inode.
parent 2684a91b
......@@ -44,6 +44,13 @@ extern struct super_block *blockdev_superblock;
*
* This function *must* be atomic for the I_DIRTY_PAGES case -
* set_page_dirty() is called under spinlock in several places.
*
* Note that for blockdevs, inode->dirtied_when represents the dirtying time of
* the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
* the kernel-internal blockdev inode represents the dirtying time of the
* blockdev's pages. This is why for I_DIRTY_PAGES we always use
* page->mapping->host, so the page-dirtying time is recorded in the internal
* blockdev inode.
*/
void __mark_inode_dirty(struct inode *inode, int flags)
{
......@@ -71,7 +78,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
spin_lock(&inode_lock);
if ((inode->i_state & flags) != flags) {
const int was_dirty = inode->i_state & I_DIRTY;
struct address_space *mapping = inode->i_mapping;
inode->i_state |= flags;
......@@ -99,7 +105,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
* reposition it (that would break s_dirty time-ordering).
*/
if (!was_dirty) {
mapping->dirtied_when = jiffies;
inode->dirtied_when = jiffies;
list_move(&inode->i_list, &sb->s_dirty);
}
}
......@@ -176,11 +182,11 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
} else if (!list_empty(&mapping->dirty_pages)) {
/* Redirtied */
inode->i_state |= I_DIRTY_PAGES;
mapping->dirtied_when = jiffies;
inode->dirtied_when = jiffies;
list_move(&inode->i_list, &sb->s_dirty);
} else if (inode->i_state & I_DIRTY) {
/* Redirtied */
mapping->dirtied_when = jiffies;
inode->dirtied_when = jiffies;
list_move(&inode->i_list, &sb->s_dirty);
} else if (atomic_read(&inode->i_count)) {
list_move(&inode->i_list, &inode_in_use);
......@@ -220,7 +226,7 @@ __writeback_single_inode(struct inode *inode,
* Write out a superblock's list of dirty inodes. A wait will be performed
* upon no inodes, all inodes or the final one, depending upon sync_mode.
*
* If older_than_this is non-NULL, then only write out mappings which
* If older_than_this is non-NULL, then only write out inodes which
* had their first dirtying at a time earlier than *older_than_this.
*
* If we're a pdlfush thread, then implement pdflush collision avoidance
......@@ -292,11 +298,11 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
}
/* Was this inode dirtied after sync_sb_inodes was called? */
if (time_after(mapping->dirtied_when, start))
if (time_after(inode->dirtied_when, start))
break;
/* Was this inode dirtied too recently? */
if (wbc->older_than_this && time_after(mapping->dirtied_when,
if (wbc->older_than_this && time_after(inode->dirtied_when,
*wbc->older_than_this))
break;
......@@ -308,7 +314,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
__iget(inode);
__writeback_single_inode(inode, wbc);
if (wbc->sync_mode == WB_SYNC_HOLD) {
mapping->dirtied_when = jiffies;
inode->dirtied_when = jiffies;
list_move(&inode->i_list, &sb->s_dirty);
}
if (current_is_pdflush())
......
......@@ -132,6 +132,7 @@ static struct inode *alloc_inode(struct super_block *sb)
inode->i_cdev = NULL;
inode->i_rdev = 0;
inode->i_security = NULL;
inode->dirtied_when = 0;
if (security_inode_alloc(inode)) {
if (inode->i_sb->s_op->destroy_inode)
inode->i_sb->s_op->destroy_inode(inode);
......@@ -144,7 +145,6 @@ static struct inode *alloc_inode(struct super_block *sb)
mapping->host = inode;
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
mapping->dirtied_when = 0;
mapping->assoc_mapping = NULL;
mapping->backing_dev_info = &default_backing_dev_info;
if (sb->s_bdev)
......
......@@ -333,7 +333,6 @@ struct address_space {
struct list_head i_mmap_shared; /* list of shared mappings */
struct semaphore i_shared_sem; /* protect both above lists */
atomic_t truncate_count; /* Cover race condition with truncate */
unsigned long dirtied_when; /* jiffies of first page dirtying */
unsigned long flags; /* error bits/gfp mask */
struct backing_dev_info *backing_dev_info; /* device readahead, etc */
spinlock_t private_lock; /* for use by the address_space */
......@@ -416,6 +415,7 @@ struct inode {
struct dnotify_struct *i_dnotify; /* for directory notifications */
unsigned long i_state;
unsigned long dirtied_when; /* jiffies of first dirtying */
unsigned int i_flags;
unsigned char i_sock;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment