Commit edcb0722 authored by Tejun Heo's avatar Tejun Heo

blkcg: introduce blkg_stat and blkg_rwstat

blkcg uses u64_stats_sync to avoid reading wrong u64 statistic values
on 32bit archs and some stat counters have subtypes to distinguish
read/writes and sync/async IOs.  The stat code paths are confusing and
involve a lot of going back and forth between blkcg core and specific
policy implementations, and synchronization and subtype handling are
open coded in blkcg core.

This patch introduces struct blkg_stat and blkg_rwstat which, with
accompanying operations, encapsulate stat updating and accessing with
proper synchronization.

blkg_stat is simple u64 counter with 64bit read-access protection.
blkg_rwstat is the one with rw and [a]sync subcounters and takes @rw
flags to distinguish IO subtypes (%REQ_WRITE and %REQ_SYNC) and
replaces stat_sub_type indexed arrays.

All counters in blkio_group_stats and blkio_group_stats_cpu are
replaced with either blkg_stat or blkg_rwstat along with all users.

This does add one u64_stats_sync per counter and increase stats_sync
operations but they're empty/noops on 64bit archs and blkcg doesn't
have too many counters, especially with DEBUG_BLK_CGROUP off.

While the currently resulting code isn't necessarily simpler at the
moment, this will enable further clean up of blkcg stats code.

- BLKIO_STAT_{READ|WRITE|SYNC|ASYNC|TOTAL} renamed to
  BLKG_RWSTAT_{READ|WRITE|SYNC|ASYNC|TOTAL}.

- blkg_stat_add() replaces blkio_add_stat() and
  blkio_check_and_dec_stat().  Note that BUG_ON() on underflow in the
  latter function no longer exists.  It's *way* better to have
  underflowed stat counters than oopsing.

- blkio_group_stats->dequeue is now a proper u64 stat counter instead
  of ulong.

- reset_stats() updated to clear each stat counters individually and
  BLKG_STATS_DEBUG_CLEAR_{START|SIZE} are removed.

- Some functions reconstruct rw flags from direction and sync
  booleans.  This will be removed by future patches.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 2aa4a152
...@@ -132,46 +132,6 @@ static inline void blkio_update_group_iops(struct blkio_group *blkg, ...@@ -132,46 +132,6 @@ static inline void blkio_update_group_iops(struct blkio_group *blkg,
} }
} }
/*
* Add to the appropriate stat variable depending on the request type.
* This should be called with queue_lock held.
*/
static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
bool sync)
{
if (direction)
stat[BLKIO_STAT_WRITE] += add;
else
stat[BLKIO_STAT_READ] += add;
if (sync)
stat[BLKIO_STAT_SYNC] += add;
else
stat[BLKIO_STAT_ASYNC] += add;
}
/*
* Decrements the appropriate stat variable if non-zero depending on the
* request type. Panics on value being zero.
* This should be called with the queue_lock held.
*/
static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
{
if (direction) {
BUG_ON(stat[BLKIO_STAT_WRITE] == 0);
stat[BLKIO_STAT_WRITE]--;
} else {
BUG_ON(stat[BLKIO_STAT_READ] == 0);
stat[BLKIO_STAT_READ]--;
}
if (sync) {
BUG_ON(stat[BLKIO_STAT_SYNC] == 0);
stat[BLKIO_STAT_SYNC]--;
} else {
BUG_ON(stat[BLKIO_STAT_ASYNC] == 0);
stat[BLKIO_STAT_ASYNC]--;
}
}
#ifdef CONFIG_DEBUG_BLK_CGROUP #ifdef CONFIG_DEBUG_BLK_CGROUP
/* This should be called with the queue_lock held. */ /* This should be called with the queue_lock held. */
static void blkio_set_start_group_wait_time(struct blkio_group *blkg, static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
...@@ -198,7 +158,8 @@ static void blkio_update_group_wait_time(struct blkio_group_stats *stats) ...@@ -198,7 +158,8 @@ static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
now = sched_clock(); now = sched_clock();
if (time_after64(now, stats->start_group_wait_time)) if (time_after64(now, stats->start_group_wait_time))
stats->group_wait_time += now - stats->start_group_wait_time; blkg_stat_add(&stats->group_wait_time,
now - stats->start_group_wait_time);
blkio_clear_blkg_waiting(stats); blkio_clear_blkg_waiting(stats);
} }
...@@ -212,7 +173,8 @@ static void blkio_end_empty_time(struct blkio_group_stats *stats) ...@@ -212,7 +173,8 @@ static void blkio_end_empty_time(struct blkio_group_stats *stats)
now = sched_clock(); now = sched_clock();
if (time_after64(now, stats->start_empty_time)) if (time_after64(now, stats->start_empty_time))
stats->empty_time += now - stats->start_empty_time; blkg_stat_add(&stats->empty_time,
now - stats->start_empty_time);
blkio_clear_blkg_empty(stats); blkio_clear_blkg_empty(stats);
} }
...@@ -239,11 +201,9 @@ void blkiocg_update_idle_time_stats(struct blkio_group *blkg, ...@@ -239,11 +201,9 @@ void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
if (blkio_blkg_idling(stats)) { if (blkio_blkg_idling(stats)) {
unsigned long long now = sched_clock(); unsigned long long now = sched_clock();
if (time_after64(now, stats->start_idle_time)) { if (time_after64(now, stats->start_idle_time))
u64_stats_update_begin(&stats->syncp); blkg_stat_add(&stats->idle_time,
stats->idle_time += now - stats->start_idle_time; now - stats->start_idle_time);
u64_stats_update_end(&stats->syncp);
}
blkio_clear_blkg_idling(stats); blkio_clear_blkg_idling(stats);
} }
} }
...@@ -256,13 +216,10 @@ void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg, ...@@ -256,13 +216,10 @@ void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(blkg->q->queue_lock);
u64_stats_update_begin(&stats->syncp); blkg_stat_add(&stats->avg_queue_size_sum,
stats->avg_queue_size_sum += blkg_rwstat_sum(&stats->queued));
stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] + blkg_stat_add(&stats->avg_queue_size_samples, 1);
stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE];
stats->avg_queue_size_samples++;
blkio_update_group_wait_time(stats); blkio_update_group_wait_time(stats);
u64_stats_update_end(&stats->syncp);
} }
EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats); EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
...@@ -273,8 +230,7 @@ void blkiocg_set_start_empty_time(struct blkio_group *blkg, ...@@ -273,8 +230,7 @@ void blkiocg_set_start_empty_time(struct blkio_group *blkg,
lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(blkg->q->queue_lock);
if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] || if (blkg_rwstat_sum(&stats->queued))
stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE])
return; return;
/* /*
...@@ -298,7 +254,7 @@ void blkiocg_update_dequeue_stats(struct blkio_group *blkg, ...@@ -298,7 +254,7 @@ void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(blkg->q->queue_lock);
pd->stats.dequeue += dequeue; blkg_stat_add(&pd->stats.dequeue, dequeue);
} }
EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats); EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
#else #else
...@@ -314,14 +270,12 @@ void blkiocg_update_io_add_stats(struct blkio_group *blkg, ...@@ -314,14 +270,12 @@ void blkiocg_update_io_add_stats(struct blkio_group *blkg,
bool sync) bool sync)
{ {
struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(blkg->q->queue_lock);
u64_stats_update_begin(&stats->syncp); blkg_rwstat_add(&stats->queued, rw, 1);
blkio_add_stat(stats->stat_arr[BLKIO_STAT_QUEUED], 1, direction, sync);
blkio_end_empty_time(stats); blkio_end_empty_time(stats);
u64_stats_update_end(&stats->syncp);
blkio_set_start_group_wait_time(blkg, pol, curr_blkg); blkio_set_start_group_wait_time(blkg, pol, curr_blkg);
} }
EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats); EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
...@@ -331,13 +285,11 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg, ...@@ -331,13 +285,11 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
bool direction, bool sync) bool direction, bool sync)
{ {
struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(blkg->q->queue_lock);
u64_stats_update_begin(&stats->syncp); blkg_rwstat_add(&stats->queued, rw, -1);
blkio_check_and_dec_stat(stats->stat_arr[BLKIO_STAT_QUEUED], direction,
sync);
u64_stats_update_end(&stats->syncp);
} }
EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
...@@ -350,12 +302,10 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg, ...@@ -350,12 +302,10 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg,
lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(blkg->q->queue_lock);
u64_stats_update_begin(&stats->syncp); blkg_stat_add(&stats->time, time);
stats->time += time;
#ifdef CONFIG_DEBUG_BLK_CGROUP #ifdef CONFIG_DEBUG_BLK_CGROUP
stats->unaccounted_time += unaccounted_time; blkg_stat_add(&stats->unaccounted_time, unaccounted_time);
#endif #endif
u64_stats_update_end(&stats->syncp);
} }
EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
...@@ -367,6 +317,7 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg, ...@@ -367,6 +317,7 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
struct blkio_policy_type *pol, struct blkio_policy_type *pol,
uint64_t bytes, bool direction, bool sync) uint64_t bytes, bool direction, bool sync)
{ {
int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
struct blkg_policy_data *pd = blkg->pd[pol->plid]; struct blkg_policy_data *pd = blkg->pd[pol->plid];
struct blkio_group_stats_cpu *stats_cpu; struct blkio_group_stats_cpu *stats_cpu;
unsigned long flags; unsigned long flags;
...@@ -384,13 +335,10 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg, ...@@ -384,13 +335,10 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
stats_cpu = this_cpu_ptr(pd->stats_cpu); stats_cpu = this_cpu_ptr(pd->stats_cpu);
u64_stats_update_begin(&stats_cpu->syncp); blkg_stat_add(&stats_cpu->sectors, bytes >> 9);
stats_cpu->sectors += bytes >> 9; blkg_rwstat_add(&stats_cpu->serviced, rw, 1);
blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICED], blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes);
1, direction, sync);
blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICE_BYTES],
bytes, direction, sync);
u64_stats_update_end(&stats_cpu->syncp);
local_irq_restore(flags); local_irq_restore(flags);
} }
EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats); EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
...@@ -403,17 +351,15 @@ void blkiocg_update_completion_stats(struct blkio_group *blkg, ...@@ -403,17 +351,15 @@ void blkiocg_update_completion_stats(struct blkio_group *blkg,
{ {
struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
unsigned long long now = sched_clock(); unsigned long long now = sched_clock();
int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(blkg->q->queue_lock);
u64_stats_update_begin(&stats->syncp);
if (time_after64(now, io_start_time)) if (time_after64(now, io_start_time))
blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME], blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
now - io_start_time, direction, sync);
if (time_after64(io_start_time, start_time)) if (time_after64(io_start_time, start_time))
blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME], blkg_rwstat_add(&stats->wait_time, rw,
io_start_time - start_time, direction, sync); io_start_time - start_time);
u64_stats_update_end(&stats->syncp);
} }
EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats); EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
...@@ -423,12 +369,11 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, ...@@ -423,12 +369,11 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
bool direction, bool sync) bool direction, bool sync)
{ {
struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(blkg->q->queue_lock);
u64_stats_update_begin(&stats->syncp); blkg_rwstat_add(&stats->merged, rw, 1);
blkio_add_stat(stats->stat_arr[BLKIO_STAT_MERGED], 1, direction, sync);
u64_stats_update_end(&stats->syncp);
} }
EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
...@@ -757,8 +702,9 @@ static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid) ...@@ -757,8 +702,9 @@ static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
struct blkio_group_stats_cpu *sc = struct blkio_group_stats_cpu *sc =
per_cpu_ptr(pd->stats_cpu, cpu); per_cpu_ptr(pd->stats_cpu, cpu);
sc->sectors = 0; blkg_rwstat_reset(&sc->service_bytes);
memset(sc->stat_arr_cpu, 0, sizeof(sc->stat_arr_cpu)); blkg_rwstat_reset(&sc->serviced);
blkg_stat_reset(&sc->sectors);
} }
} }
...@@ -768,7 +714,6 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) ...@@ -768,7 +714,6 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
struct blkio_group *blkg; struct blkio_group *blkg;
struct hlist_node *n; struct hlist_node *n;
int i;
spin_lock(&blkio_list_lock); spin_lock(&blkio_list_lock);
spin_lock_irq(&blkcg->lock); spin_lock_irq(&blkcg->lock);
...@@ -786,14 +731,18 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) ...@@ -786,14 +731,18 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
struct blkio_group_stats *stats = &pd->stats; struct blkio_group_stats *stats = &pd->stats;
/* queued stats shouldn't be cleared */ /* queued stats shouldn't be cleared */
for (i = 0; i < ARRAY_SIZE(stats->stat_arr); i++) blkg_rwstat_reset(&stats->merged);
if (i != BLKIO_STAT_QUEUED) blkg_rwstat_reset(&stats->service_time);
memset(stats->stat_arr[i], 0, blkg_rwstat_reset(&stats->wait_time);
sizeof(stats->stat_arr[i])); blkg_stat_reset(&stats->time);
stats->time = 0;
#ifdef CONFIG_DEBUG_BLK_CGROUP #ifdef CONFIG_DEBUG_BLK_CGROUP
memset((void *)stats + BLKG_STATS_DEBUG_CLEAR_START, 0, blkg_stat_reset(&stats->unaccounted_time);
BLKG_STATS_DEBUG_CLEAR_SIZE); blkg_stat_reset(&stats->avg_queue_size_sum);
blkg_stat_reset(&stats->avg_queue_size_samples);
blkg_stat_reset(&stats->dequeue);
blkg_stat_reset(&stats->group_wait_time);
blkg_stat_reset(&stats->idle_time);
blkg_stat_reset(&stats->empty_time);
#endif #endif
blkio_reset_stats_cpu(blkg, pol->plid); blkio_reset_stats_cpu(blkg, pol->plid);
} }
...@@ -804,7 +753,7 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) ...@@ -804,7 +753,7 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
return 0; return 0;
} }
static void blkio_get_key_name(enum stat_sub_type type, const char *dname, static void blkio_get_key_name(enum blkg_rwstat_type type, const char *dname,
char *str, int chars_left, bool diskname_only) char *str, int chars_left, bool diskname_only)
{ {
snprintf(str, chars_left, "%s", dname); snprintf(str, chars_left, "%s", dname);
...@@ -817,19 +766,19 @@ static void blkio_get_key_name(enum stat_sub_type type, const char *dname, ...@@ -817,19 +766,19 @@ static void blkio_get_key_name(enum stat_sub_type type, const char *dname,
if (diskname_only) if (diskname_only)
return; return;
switch (type) { switch (type) {
case BLKIO_STAT_READ: case BLKG_RWSTAT_READ:
strlcat(str, " Read", chars_left); strlcat(str, " Read", chars_left);
break; break;
case BLKIO_STAT_WRITE: case BLKG_RWSTAT_WRITE:
strlcat(str, " Write", chars_left); strlcat(str, " Write", chars_left);
break; break;
case BLKIO_STAT_SYNC: case BLKG_RWSTAT_SYNC:
strlcat(str, " Sync", chars_left); strlcat(str, " Sync", chars_left);
break; break;
case BLKIO_STAT_ASYNC: case BLKG_RWSTAT_ASYNC:
strlcat(str, " Async", chars_left); strlcat(str, " Async", chars_left);
break; break;
case BLKIO_STAT_TOTAL: case BLKG_RWSTAT_TOTAL:
strlcat(str, " Total", chars_left); strlcat(str, " Total", chars_left);
break; break;
default: default:
...@@ -838,29 +787,34 @@ static void blkio_get_key_name(enum stat_sub_type type, const char *dname, ...@@ -838,29 +787,34 @@ static void blkio_get_key_name(enum stat_sub_type type, const char *dname,
} }
static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid, static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid,
enum stat_type_cpu type, enum stat_sub_type sub_type) enum stat_type_cpu type,
enum blkg_rwstat_type sub_type)
{ {
struct blkg_policy_data *pd = blkg->pd[plid]; struct blkg_policy_data *pd = blkg->pd[plid];
u64 val = 0;
int cpu; int cpu;
struct blkio_group_stats_cpu *stats_cpu;
u64 val = 0, tval;
if (pd->stats_cpu == NULL) if (pd->stats_cpu == NULL)
return val; return val;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
unsigned int start; struct blkio_group_stats_cpu *stats_cpu =
stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu); per_cpu_ptr(pd->stats_cpu, cpu);
struct blkg_rwstat rws;
do {
start = u64_stats_fetch_begin(&stats_cpu->syncp);
if (type == BLKIO_STAT_CPU_SECTORS)
tval = stats_cpu->sectors;
else
tval = stats_cpu->stat_arr_cpu[type][sub_type];
} while(u64_stats_fetch_retry(&stats_cpu->syncp, start));
val += tval; switch (type) {
case BLKIO_STAT_CPU_SECTORS:
val += blkg_stat_read(&stats_cpu->sectors);
break;
case BLKIO_STAT_CPU_SERVICE_BYTES:
rws = blkg_rwstat_read(&stats_cpu->service_bytes);
val += rws.cnt[sub_type];
break;
case BLKIO_STAT_CPU_SERVICED:
rws = blkg_rwstat_read(&stats_cpu->serviced);
val += rws.cnt[sub_type];
break;
}
} }
return val; return val;
...@@ -872,7 +826,7 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid, ...@@ -872,7 +826,7 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
{ {
uint64_t disk_total, val; uint64_t disk_total, val;
char key_str[MAX_KEY_LEN]; char key_str[MAX_KEY_LEN];
enum stat_sub_type sub_type; enum blkg_rwstat_type sub_type;
if (type == BLKIO_STAT_CPU_SECTORS) { if (type == BLKIO_STAT_CPU_SECTORS) {
val = blkio_read_stat_cpu(blkg, plid, type, 0); val = blkio_read_stat_cpu(blkg, plid, type, 0);
...@@ -881,7 +835,7 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid, ...@@ -881,7 +835,7 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
return val; return val;
} }
for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL; for (sub_type = BLKG_RWSTAT_READ; sub_type < BLKG_RWSTAT_NR;
sub_type++) { sub_type++) {
blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN, blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
false); false);
...@@ -889,10 +843,10 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid, ...@@ -889,10 +843,10 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
cb->fill(cb, key_str, val); cb->fill(cb, key_str, val);
} }
disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_READ) + disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKG_RWSTAT_READ) +
blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_WRITE); blkio_read_stat_cpu(blkg, plid, type, BLKG_RWSTAT_WRITE);
blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN, blkio_get_key_name(BLKG_RWSTAT_TOTAL, dname, key_str, MAX_KEY_LEN,
false); false);
cb->fill(cb, key_str, disk_total); cb->fill(cb, key_str, disk_total);
return disk_total; return disk_total;
...@@ -905,65 +859,76 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid, ...@@ -905,65 +859,76 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid,
struct blkio_group_stats *stats = &blkg->pd[plid]->stats; struct blkio_group_stats *stats = &blkg->pd[plid]->stats;
uint64_t v = 0, disk_total = 0; uint64_t v = 0, disk_total = 0;
char key_str[MAX_KEY_LEN]; char key_str[MAX_KEY_LEN];
unsigned int sync_start; struct blkg_rwstat rws = { };
int st; int st;
if (type >= BLKIO_STAT_ARR_NR) { if (type >= BLKIO_STAT_ARR_NR) {
do {
sync_start = u64_stats_fetch_begin(&stats->syncp);
switch (type) { switch (type) {
case BLKIO_STAT_TIME: case BLKIO_STAT_TIME:
v = stats->time; v = blkg_stat_read(&stats->time);
break; break;
#ifdef CONFIG_DEBUG_BLK_CGROUP #ifdef CONFIG_DEBUG_BLK_CGROUP
case BLKIO_STAT_UNACCOUNTED_TIME: case BLKIO_STAT_UNACCOUNTED_TIME:
v = stats->unaccounted_time; v = blkg_stat_read(&stats->unaccounted_time);
break; break;
case BLKIO_STAT_AVG_QUEUE_SIZE: { case BLKIO_STAT_AVG_QUEUE_SIZE: {
uint64_t samples = stats->avg_queue_size_samples; uint64_t samples;
samples = blkg_stat_read(&stats->avg_queue_size_samples);
if (samples) { if (samples) {
v = stats->avg_queue_size_sum; v = blkg_stat_read(&stats->avg_queue_size_sum);
do_div(v, samples); do_div(v, samples);
} }
break; break;
} }
case BLKIO_STAT_IDLE_TIME: case BLKIO_STAT_IDLE_TIME:
v = stats->idle_time; v = blkg_stat_read(&stats->idle_time);
break; break;
case BLKIO_STAT_EMPTY_TIME: case BLKIO_STAT_EMPTY_TIME:
v = stats->empty_time; v = blkg_stat_read(&stats->empty_time);
break; break;
case BLKIO_STAT_DEQUEUE: case BLKIO_STAT_DEQUEUE:
v = stats->dequeue; v = blkg_stat_read(&stats->dequeue);
break; break;
case BLKIO_STAT_GROUP_WAIT_TIME: case BLKIO_STAT_GROUP_WAIT_TIME:
v = stats->group_wait_time; v = blkg_stat_read(&stats->group_wait_time);
break; break;
#endif #endif
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
} }
} while (u64_stats_fetch_retry(&stats->syncp, sync_start));
blkio_get_key_name(0, dname, key_str, MAX_KEY_LEN, true); blkio_get_key_name(0, dname, key_str, MAX_KEY_LEN, true);
cb->fill(cb, key_str, v); cb->fill(cb, key_str, v);
return v; return v;
} }
for (st = BLKIO_STAT_READ; st < BLKIO_STAT_TOTAL; st++) { switch (type) {
do { case BLKIO_STAT_MERGED:
sync_start = u64_stats_fetch_begin(&stats->syncp); rws = blkg_rwstat_read(&stats->merged);
v = stats->stat_arr[type][st]; break;
} while (u64_stats_fetch_retry(&stats->syncp, sync_start)); case BLKIO_STAT_SERVICE_TIME:
rws = blkg_rwstat_read(&stats->service_time);
break;
case BLKIO_STAT_WAIT_TIME:
rws = blkg_rwstat_read(&stats->wait_time);
break;
case BLKIO_STAT_QUEUED:
rws = blkg_rwstat_read(&stats->queued);
break;
default:
WARN_ON_ONCE(true);
break;
}
for (st = BLKG_RWSTAT_READ; st < BLKG_RWSTAT_NR; st++) {
blkio_get_key_name(st, dname, key_str, MAX_KEY_LEN, false); blkio_get_key_name(st, dname, key_str, MAX_KEY_LEN, false);
cb->fill(cb, key_str, v); cb->fill(cb, key_str, rws.cnt[st]);
if (st == BLKIO_STAT_READ || st == BLKIO_STAT_WRITE) if (st == BLKG_RWSTAT_READ || st == BLKG_RWSTAT_WRITE)
disk_total += v; disk_total += rws.cnt[st];
} }
blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN, blkio_get_key_name(BLKG_RWSTAT_TOTAL, dname, key_str, MAX_KEY_LEN,
false); false);
cb->fill(cb, key_str, disk_total); cb->fill(cb, key_str, disk_total);
return disk_total; return disk_total;
......
...@@ -69,12 +69,14 @@ enum stat_type_cpu { ...@@ -69,12 +69,14 @@ enum stat_type_cpu {
#define BLKIO_STAT_CPU_ARR_NR (BLKIO_STAT_CPU_SERVICED + 1) #define BLKIO_STAT_CPU_ARR_NR (BLKIO_STAT_CPU_SERVICED + 1)
enum stat_sub_type { enum blkg_rwstat_type {
BLKIO_STAT_READ = 0, BLKG_RWSTAT_READ,
BLKIO_STAT_WRITE, BLKG_RWSTAT_WRITE,
BLKIO_STAT_SYNC, BLKG_RWSTAT_SYNC,
BLKIO_STAT_ASYNC, BLKG_RWSTAT_ASYNC,
BLKIO_STAT_TOTAL
BLKG_RWSTAT_NR,
BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
}; };
/* blkg state flags */ /* blkg state flags */
...@@ -124,33 +126,42 @@ struct blkio_cgroup { ...@@ -124,33 +126,42 @@ struct blkio_cgroup {
uint64_t id; uint64_t id;
}; };
struct blkio_group_stats { struct blkg_stat {
struct u64_stats_sync syncp;
uint64_t cnt;
};
struct blkg_rwstat {
struct u64_stats_sync syncp; struct u64_stats_sync syncp;
uint64_t cnt[BLKG_RWSTAT_NR];
};
struct blkio_group_stats {
/* number of ios merged */
struct blkg_rwstat merged;
/* total time spent on device in ns, may not be accurate w/ queueing */
struct blkg_rwstat service_time;
/* total time spent waiting in scheduler queue in ns */
struct blkg_rwstat wait_time;
/* number of IOs queued up */
struct blkg_rwstat queued;
/* total disk time and nr sectors dispatched by this group */ /* total disk time and nr sectors dispatched by this group */
uint64_t time; struct blkg_stat time;
uint64_t stat_arr[BLKIO_STAT_ARR_NR][BLKIO_STAT_TOTAL];
#ifdef CONFIG_DEBUG_BLK_CGROUP #ifdef CONFIG_DEBUG_BLK_CGROUP
/* Time not charged to this cgroup */ /* time not charged to this cgroup */
uint64_t unaccounted_time; struct blkg_stat unaccounted_time;
/* sum of number of ios queued across all samples */
/* Sum of number of IOs queued across all samples */ struct blkg_stat avg_queue_size_sum;
uint64_t avg_queue_size_sum; /* count of samples taken for average */
/* Count of samples taken for average */ struct blkg_stat avg_queue_size_samples;
uint64_t avg_queue_size_samples; /* how many times this group has been removed from service tree */
/* How many times this group has been removed from service tree */ struct blkg_stat dequeue;
unsigned long dequeue; /* total time spent waiting for it to be assigned a timeslice. */
struct blkg_stat group_wait_time;
/* Total time spent waiting for it to be assigned a timeslice. */ /* time spent idling for this blkio_group */
uint64_t group_wait_time; struct blkg_stat idle_time;
/* total time with empty current active q with other requests queued */
/* Time spent idling for this blkio_group */ struct blkg_stat empty_time;
uint64_t idle_time;
/*
* Total time when we have requests queued and do not contain the
* current active queue.
*/
uint64_t empty_time;
/* fields after this shouldn't be cleared on stat reset */ /* fields after this shouldn't be cleared on stat reset */
uint64_t start_group_wait_time; uint64_t start_group_wait_time;
uint64_t start_idle_time; uint64_t start_idle_time;
...@@ -159,19 +170,14 @@ struct blkio_group_stats { ...@@ -159,19 +170,14 @@ struct blkio_group_stats {
#endif #endif
}; };
#ifdef CONFIG_DEBUG_BLK_CGROUP
#define BLKG_STATS_DEBUG_CLEAR_START \
offsetof(struct blkio_group_stats, unaccounted_time)
#define BLKG_STATS_DEBUG_CLEAR_SIZE \
(offsetof(struct blkio_group_stats, start_group_wait_time) - \
BLKG_STATS_DEBUG_CLEAR_START)
#endif
/* Per cpu blkio group stats */ /* Per cpu blkio group stats */
struct blkio_group_stats_cpu { struct blkio_group_stats_cpu {
uint64_t sectors; /* total bytes transferred */
uint64_t stat_arr_cpu[BLKIO_STAT_CPU_ARR_NR][BLKIO_STAT_TOTAL]; struct blkg_rwstat service_bytes;
struct u64_stats_sync syncp; /* total IOs serviced, post merge */
struct blkg_rwstat serviced;
/* total sectors transferred */
struct blkg_stat sectors;
}; };
struct blkio_group_conf { struct blkio_group_conf {
...@@ -316,6 +322,121 @@ static inline void blkg_put(struct blkio_group *blkg) ...@@ -316,6 +322,121 @@ static inline void blkg_put(struct blkio_group *blkg)
__blkg_release(blkg); __blkg_release(blkg);
} }
/**
* blkg_stat_add - add a value to a blkg_stat
* @stat: target blkg_stat
* @val: value to add
*
* Add @val to @stat. The caller is responsible for synchronizing calls to
* this function.
*/
static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
{
u64_stats_update_begin(&stat->syncp);
stat->cnt += val;
u64_stats_update_end(&stat->syncp);
}
/**
* blkg_stat_read - read the current value of a blkg_stat
* @stat: blkg_stat to read
*
* Read the current value of @stat. This function can be called without
* synchroniztion and takes care of u64 atomicity.
*/
static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
{
unsigned int start;
uint64_t v;
do {
start = u64_stats_fetch_begin(&stat->syncp);
v = stat->cnt;
} while (u64_stats_fetch_retry(&stat->syncp, start));
return v;
}
/**
* blkg_stat_reset - reset a blkg_stat
* @stat: blkg_stat to reset
*/
static inline void blkg_stat_reset(struct blkg_stat *stat)
{
stat->cnt = 0;
}
/**
* blkg_rwstat_add - add a value to a blkg_rwstat
* @rwstat: target blkg_rwstat
* @rw: mask of REQ_{WRITE|SYNC}
* @val: value to add
*
* Add @val to @rwstat. The counters are chosen according to @rw. The
* caller is responsible for synchronizing calls to this function.
*/
static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
int rw, uint64_t val)
{
u64_stats_update_begin(&rwstat->syncp);
if (rw & REQ_WRITE)
rwstat->cnt[BLKG_RWSTAT_WRITE] += val;
else
rwstat->cnt[BLKG_RWSTAT_READ] += val;
if (rw & REQ_SYNC)
rwstat->cnt[BLKG_RWSTAT_SYNC] += val;
else
rwstat->cnt[BLKG_RWSTAT_ASYNC] += val;
u64_stats_update_end(&rwstat->syncp);
}
/**
* blkg_rwstat_read - read the current values of a blkg_rwstat
* @rwstat: blkg_rwstat to read
*
* Read the current snapshot of @rwstat and return it as the return value.
* This function can be called without synchronization and takes care of
* u64 atomicity.
*/
static struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
{
unsigned int start;
struct blkg_rwstat tmp;
do {
start = u64_stats_fetch_begin(&rwstat->syncp);
tmp = *rwstat;
} while (u64_stats_fetch_retry(&rwstat->syncp, start));
return tmp;
}
/**
* blkg_rwstat_sum - read the total count of a blkg_rwstat
* @rwstat: blkg_rwstat to read
*
* Return the total count of @rwstat regardless of the IO direction. This
* function can be called without synchronization and takes care of u64
* atomicity.
*/
static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat)
{
struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
}
/**
* blkg_rwstat_reset - reset a blkg_rwstat
* @rwstat: blkg_rwstat to reset
*/
static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
{
memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
}
#else #else
struct blkio_group { struct blkio_group {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment