Commit dcc25ae7 authored by Tejun Heo's avatar Tejun Heo Committed by Jens Axboe

writeback: move global_dirty_limit into wb_domain

This patch is a part of the series to define wb_domain which
represents a domain that wb's (bdi_writeback's) belong to and are
measured against each other in.  This will enable IO backpressure
propagation for cgroup writeback.

global_dirty_limit exists to regulate the global dirty threshold which
is a property of the wb_domain.  This patch moves hard_dirty_limit,
dirty_lock, and update_time into wb_domain.

This is pure reorganization and doesn't introduce any behavioral
changes.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 380c27ca
...@@ -887,7 +887,7 @@ static long writeback_chunk_size(struct bdi_writeback *wb, ...@@ -887,7 +887,7 @@ static long writeback_chunk_size(struct bdi_writeback *wb,
pages = LONG_MAX; pages = LONG_MAX;
else { else {
pages = min(wb->avg_write_bandwidth / 2, pages = min(wb->avg_write_bandwidth / 2,
global_dirty_limit / DIRTY_SCOPE); global_wb_domain.dirty_limit / DIRTY_SCOPE);
pages = min(pages, work->nr_pages); pages = min(pages, work->nr_pages);
pages = round_down(pages + MIN_WRITEBACK_PAGES, pages = round_down(pages + MIN_WRITEBACK_PAGES,
MIN_WRITEBACK_PAGES); MIN_WRITEBACK_PAGES);
......
...@@ -95,6 +95,8 @@ struct writeback_control { ...@@ -95,6 +95,8 @@ struct writeback_control {
* dirtyable memory accordingly. * dirtyable memory accordingly.
*/ */
struct wb_domain { struct wb_domain {
spinlock_t lock;
/* /*
* Scale the writeback cache size proportional to the relative * Scale the writeback cache size proportional to the relative
* writeout speed. * writeout speed.
...@@ -115,6 +117,19 @@ struct wb_domain { ...@@ -115,6 +117,19 @@ struct wb_domain {
struct fprop_global completions; struct fprop_global completions;
struct timer_list period_timer; /* timer for aging of completions */ struct timer_list period_timer; /* timer for aging of completions */
unsigned long period_time; unsigned long period_time;
/*
* The dirtyable memory and dirty threshold could be suddenly
* knocked down by a large amount (eg. on the startup of KVM in a
* swapless system). This may throw the system into deep dirty
* exceeded state and throttle heavy/light dirtiers alike. To
* retain good responsiveness, maintain global_dirty_limit for
* tracking slowly down to the knocked down dirty threshold.
*
* Both fields are protected by ->lock.
*/
unsigned long dirty_limit_tstamp;
unsigned long dirty_limit;
}; };
/* /*
...@@ -153,7 +168,7 @@ void throttle_vm_writeout(gfp_t gfp_mask); ...@@ -153,7 +168,7 @@ void throttle_vm_writeout(gfp_t gfp_mask);
bool zone_dirty_ok(struct zone *zone); bool zone_dirty_ok(struct zone *zone);
int wb_domain_init(struct wb_domain *dom, gfp_t gfp); int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
extern unsigned long global_dirty_limit; extern struct wb_domain global_wb_domain;
/* These are exported to sysctl. */ /* These are exported to sysctl. */
extern int dirty_background_ratio; extern int dirty_background_ratio;
......
...@@ -361,7 +361,7 @@ TRACE_EVENT(global_dirty_state, ...@@ -361,7 +361,7 @@ TRACE_EVENT(global_dirty_state,
__entry->nr_written = global_page_state(NR_WRITTEN); __entry->nr_written = global_page_state(NR_WRITTEN);
__entry->background_thresh = background_thresh; __entry->background_thresh = background_thresh;
__entry->dirty_thresh = dirty_thresh; __entry->dirty_thresh = dirty_thresh;
__entry->dirty_limit = global_dirty_limit; __entry->dirty_limit = global_wb_domain.dirty_limit;
), ),
TP_printk("dirty=%lu writeback=%lu unstable=%lu " TP_printk("dirty=%lu writeback=%lu unstable=%lu "
...@@ -463,8 +463,9 @@ TRACE_EVENT(balance_dirty_pages, ...@@ -463,8 +463,9 @@ TRACE_EVENT(balance_dirty_pages,
unsigned long freerun = (thresh + bg_thresh) / 2; unsigned long freerun = (thresh + bg_thresh) / 2;
strlcpy(__entry->bdi, dev_name(bdi->dev), 32); strlcpy(__entry->bdi, dev_name(bdi->dev), 32);
__entry->limit = global_dirty_limit; __entry->limit = global_wb_domain.dirty_limit;
__entry->setpoint = (global_dirty_limit + freerun) / 2; __entry->setpoint = (global_wb_domain.dirty_limit +
freerun) / 2;
__entry->dirty = dirty; __entry->dirty = dirty;
__entry->bdi_setpoint = __entry->setpoint * __entry->bdi_setpoint = __entry->setpoint *
bdi_thresh / (thresh + 1); bdi_thresh / (thresh + 1);
......
...@@ -122,9 +122,7 @@ EXPORT_SYMBOL(laptop_mode); ...@@ -122,9 +122,7 @@ EXPORT_SYMBOL(laptop_mode);
/* End of sysctl-exported parameters */ /* End of sysctl-exported parameters */
unsigned long global_dirty_limit; struct wb_domain global_wb_domain;
static struct wb_domain global_wb_domain;
/* /*
* Length of period for aging writeout fractions of bdis. This is an * Length of period for aging writeout fractions of bdis. This is an
...@@ -470,9 +468,15 @@ static void writeout_period(unsigned long t) ...@@ -470,9 +468,15 @@ static void writeout_period(unsigned long t)
int wb_domain_init(struct wb_domain *dom, gfp_t gfp) int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
{ {
memset(dom, 0, sizeof(*dom)); memset(dom, 0, sizeof(*dom));
spin_lock_init(&dom->lock);
init_timer_deferrable(&dom->period_timer); init_timer_deferrable(&dom->period_timer);
dom->period_timer.function = writeout_period; dom->period_timer.function = writeout_period;
dom->period_timer.data = (unsigned long)dom; dom->period_timer.data = (unsigned long)dom;
dom->dirty_limit_tstamp = jiffies;
return fprop_global_init(&dom->completions, gfp); return fprop_global_init(&dom->completions, gfp);
} }
...@@ -532,7 +536,9 @@ static unsigned long dirty_freerun_ceiling(unsigned long thresh, ...@@ -532,7 +536,9 @@ static unsigned long dirty_freerun_ceiling(unsigned long thresh,
static unsigned long hard_dirty_limit(unsigned long thresh) static unsigned long hard_dirty_limit(unsigned long thresh)
{ {
return max(thresh, global_dirty_limit); struct wb_domain *dom = &global_wb_domain;
return max(thresh, dom->dirty_limit);
} }
/** /**
...@@ -916,17 +922,10 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb, ...@@ -916,17 +922,10 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb,
wb->avg_write_bandwidth = avg; wb->avg_write_bandwidth = avg;
} }
/*
* The global dirtyable memory and dirty threshold could be suddenly knocked
* down by a large amount (eg. on the startup of KVM in a swapless system).
* This may throw the system into deep dirty exceeded state and throttle
* heavy/light dirtiers alike. To retain good responsiveness, maintain
* global_dirty_limit for tracking slowly down to the knocked down dirty
* threshold.
*/
static void update_dirty_limit(unsigned long thresh, unsigned long dirty) static void update_dirty_limit(unsigned long thresh, unsigned long dirty)
{ {
unsigned long limit = global_dirty_limit; struct wb_domain *dom = &global_wb_domain;
unsigned long limit = dom->dirty_limit;
/* /*
* Follow up in one step. * Follow up in one step.
...@@ -939,7 +938,7 @@ static void update_dirty_limit(unsigned long thresh, unsigned long dirty) ...@@ -939,7 +938,7 @@ static void update_dirty_limit(unsigned long thresh, unsigned long dirty)
/* /*
* Follow down slowly. Use the higher one as the target, because thresh * Follow down slowly. Use the higher one as the target, because thresh
* may drop below dirty. This is exactly the reason to introduce * may drop below dirty. This is exactly the reason to introduce
* global_dirty_limit which is guaranteed to lie above the dirty pages. * dom->dirty_limit which is guaranteed to lie above the dirty pages.
*/ */
thresh = max(thresh, dirty); thresh = max(thresh, dirty);
if (limit > thresh) { if (limit > thresh) {
...@@ -948,28 +947,27 @@ static void update_dirty_limit(unsigned long thresh, unsigned long dirty) ...@@ -948,28 +947,27 @@ static void update_dirty_limit(unsigned long thresh, unsigned long dirty)
} }
return; return;
update: update:
global_dirty_limit = limit; dom->dirty_limit = limit;
} }
static void global_update_bandwidth(unsigned long thresh, static void global_update_bandwidth(unsigned long thresh,
unsigned long dirty, unsigned long dirty,
unsigned long now) unsigned long now)
{ {
static DEFINE_SPINLOCK(dirty_lock); struct wb_domain *dom = &global_wb_domain;
static unsigned long update_time = INITIAL_JIFFIES;
/* /*
* check locklessly first to optimize away locking for the most time * check locklessly first to optimize away locking for the most time
*/ */
if (time_before(now, update_time + BANDWIDTH_INTERVAL)) if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
return; return;
spin_lock(&dirty_lock); spin_lock(&dom->lock);
if (time_after_eq(now, update_time + BANDWIDTH_INTERVAL)) { if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
update_dirty_limit(thresh, dirty); update_dirty_limit(thresh, dirty);
update_time = now; dom->dirty_limit_tstamp = now;
} }
spin_unlock(&dirty_lock); spin_unlock(&dom->lock);
} }
/* /*
...@@ -1761,10 +1759,12 @@ void laptop_sync_completion(void) ...@@ -1761,10 +1759,12 @@ void laptop_sync_completion(void)
void writeback_set_ratelimit(void) void writeback_set_ratelimit(void)
{ {
struct wb_domain *dom = &global_wb_domain;
unsigned long background_thresh; unsigned long background_thresh;
unsigned long dirty_thresh; unsigned long dirty_thresh;
global_dirty_limits(&background_thresh, &dirty_thresh); global_dirty_limits(&background_thresh, &dirty_thresh);
global_dirty_limit = dirty_thresh; dom->dirty_limit = dirty_thresh;
ratelimit_pages = dirty_thresh / (num_online_cpus() * 32); ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
if (ratelimit_pages < 16) if (ratelimit_pages < 16)
ratelimit_pages = 16; ratelimit_pages = 16;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment