Commit b6c3a594 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'lazytime_fix' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull lazytime fixes from Ted Ts'o:
 "This fixes a problem in the lazy time patches, which can cause
  frequently updated inods to never have their timestamps updated.

  These changes guarantee that no timestamp on disk will be stale by
  more than 24 hours"

* tag 'lazytime_fix' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  fs: add dirtytime_expire_seconds sysctl
  fs: make sure the timestamps for lazytime inodes eventually get written
parents 1e848913 1efff914
...@@ -53,6 +53,18 @@ struct wb_writeback_work { ...@@ -53,6 +53,18 @@ struct wb_writeback_work {
struct completion *done; /* set if the caller waits */ struct completion *done; /* set if the caller waits */
}; };
/*
* If an inode is constantly having its pages dirtied, but then the
* updates stop dirtytime_expire_interval seconds in the past, it's
* possible for the worst case time between when an inode has its
* timestamps updated and when they finally get written out to be two
* dirtytime_expire_intervals. We set the default to 12 hours (in
* seconds), which means most of the time inodes will have their
* timestamps written to disk after 12 hours, but in the worst case a
* few inodes might not their timestamps updated for 24 hours.
*/
unsigned int dirtytime_expire_interval = 12 * 60 * 60;
/** /**
* writeback_in_progress - determine whether there is writeback in progress * writeback_in_progress - determine whether there is writeback in progress
* @bdi: the device's backing_dev_info structure. * @bdi: the device's backing_dev_info structure.
...@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue, ...@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue,
if ((flags & EXPIRE_DIRTY_ATIME) == 0) if ((flags & EXPIRE_DIRTY_ATIME) == 0)
older_than_this = work->older_than_this; older_than_this = work->older_than_this;
else if ((work->reason == WB_REASON_SYNC) == 0) { else if (!work->for_sync) {
expire_time = jiffies - (HZ * 86400); expire_time = jiffies - (dirtytime_expire_interval * HZ);
older_than_this = &expire_time; older_than_this = &expire_time;
} }
while (!list_empty(delaying_queue)) { while (!list_empty(delaying_queue)) {
...@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, ...@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
*/ */
redirty_tail(inode, wb); redirty_tail(inode, wb);
} else if (inode->i_state & I_DIRTY_TIME) { } else if (inode->i_state & I_DIRTY_TIME) {
inode->dirtied_when = jiffies;
list_move(&inode->i_wb_list, &wb->b_dirty_time); list_move(&inode->i_wb_list, &wb->b_dirty_time);
} else { } else {
/* The inode is clean. Remove from writeback lists. */ /* The inode is clean. Remove from writeback lists. */
...@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY; dirty = inode->i_state & I_DIRTY;
if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && if (inode->i_state & I_DIRTY_TIME) {
(inode->i_state & I_DIRTY_TIME)) || if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
(inode->i_state & I_DIRTY_TIME_EXPIRED)) { unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
unlikely(time_after(jiffies,
(inode->dirtied_time_when +
dirtytime_expire_interval * HZ)))) {
dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
trace_writeback_lazytime(inode); trace_writeback_lazytime(inode);
} }
} else
inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
inode->i_state &= ~dirty; inode->i_state &= ~dirty;
/* /*
...@@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) ...@@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
rcu_read_unlock(); rcu_read_unlock();
} }
/*
* Wake up bdi's periodically to make sure dirtytime inodes gets
* written back periodically. We deliberately do *not* check the
* b_dirtytime list in wb_has_dirty_io(), since this would cause the
* kernel to be constantly waking up once there are any dirtytime
* inodes on the system. So instead we define a separate delayed work
* function which gets called much more rarely. (By default, only
* once every 12 hours.)
*
* If there is any other write activity going on in the file system,
* this function won't be necessary. But if the only thing that has
* happened on the file system is a dirtytime inode caused by an atime
* update, we need this infrastructure below to make sure that inode
* eventually gets pushed out to disk.
*/
static void wakeup_dirtytime_writeback(struct work_struct *w);
static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
static void wakeup_dirtytime_writeback(struct work_struct *w)
{
struct backing_dev_info *bdi;
rcu_read_lock();
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
if (list_empty(&bdi->wb.b_dirty_time))
continue;
bdi_wakeup_thread(bdi);
}
rcu_read_unlock();
schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
}
static int __init start_dirtytime_writeback(void)
{
schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
return 0;
}
__initcall(start_dirtytime_writeback);
int dirtytime_interval_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write)
mod_delayed_work(system_wq, &dirtytime_work, 0);
return ret;
}
static noinline void block_dump___mark_inode_dirty(struct inode *inode) static noinline void block_dump___mark_inode_dirty(struct inode *inode)
{ {
if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
...@@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags) ...@@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags)
} }
inode->dirtied_when = jiffies; inode->dirtied_when = jiffies;
list_move(&inode->i_wb_list, dirtytime ? if (dirtytime)
&bdi->wb.b_dirty_time : &bdi->wb.b_dirty); inode->dirtied_time_when = jiffies;
if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
else
list_move(&inode->i_wb_list,
&bdi->wb.b_dirty_time);
spin_unlock(&bdi->wb.list_lock); spin_unlock(&bdi->wb.list_lock);
trace_writeback_dirty_inode_enqueue(inode); trace_writeback_dirty_inode_enqueue(inode);
......
...@@ -604,6 +604,7 @@ struct inode { ...@@ -604,6 +604,7 @@ struct inode {
struct mutex i_mutex; struct mutex i_mutex;
unsigned long dirtied_when; /* jiffies of first dirtying */ unsigned long dirtied_when; /* jiffies of first dirtying */
unsigned long dirtied_time_when;
struct hlist_node i_hash; struct hlist_node i_hash;
struct list_head i_wb_list; /* backing dev IO list */ struct list_head i_wb_list; /* backing dev IO list */
......
...@@ -130,6 +130,7 @@ extern int vm_dirty_ratio; ...@@ -130,6 +130,7 @@ extern int vm_dirty_ratio;
extern unsigned long vm_dirty_bytes; extern unsigned long vm_dirty_bytes;
extern unsigned int dirty_writeback_interval; extern unsigned int dirty_writeback_interval;
extern unsigned int dirty_expire_interval; extern unsigned int dirty_expire_interval;
extern unsigned int dirtytime_expire_interval;
extern int vm_highmem_is_dirtyable; extern int vm_highmem_is_dirtyable;
extern int block_dump; extern int block_dump;
extern int laptop_mode; extern int laptop_mode;
...@@ -146,6 +147,8 @@ extern int dirty_ratio_handler(struct ctl_table *table, int write, ...@@ -146,6 +147,8 @@ extern int dirty_ratio_handler(struct ctl_table *table, int write,
extern int dirty_bytes_handler(struct ctl_table *table, int write, extern int dirty_bytes_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, void __user *buffer, size_t *lenp,
loff_t *ppos); loff_t *ppos);
int dirtytime_interval_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
struct ctl_table; struct ctl_table;
int dirty_writeback_centisecs_handler(struct ctl_table *, int, int dirty_writeback_centisecs_handler(struct ctl_table *, int,
......
...@@ -1227,6 +1227,14 @@ static struct ctl_table vm_table[] = { ...@@ -1227,6 +1227,14 @@ static struct ctl_table vm_table[] = {
.proc_handler = proc_dointvec_minmax, .proc_handler = proc_dointvec_minmax,
.extra1 = &zero, .extra1 = &zero,
}, },
{
.procname = "dirtytime_expire_seconds",
.data = &dirtytime_expire_interval,
.maxlen = sizeof(dirty_expire_interval),
.mode = 0644,
.proc_handler = dirtytime_interval_handler,
.extra1 = &zero,
},
{ {
.procname = "nr_pdflush_threads", .procname = "nr_pdflush_threads",
.mode = 0444 /* read-only */, .mode = 0444 /* read-only */,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment