Commit 1d7734a0 authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Philipp Reisner

drbd: use rolling marks for resync speed calculation

The current resync speed as displayed in /proc/drbd fluctuates a lot.
Using an array of rolling marks makes this calculation much more stable.
We used to have this (a long time ago with 0.7), but it got lost somehow.

If "stalled", do not discard the rest of the information, just add a
" (stalled)" tag to the progress line.

This patch also shortens a spinlock critical section somewhat, and
reduces the number of atomic operations in put_ldev.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent 0bb70bf6
...@@ -965,29 +965,30 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, ...@@ -965,29 +965,30 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
* ok, (capacity & 7) != 0 sometimes, but who cares... * ok, (capacity & 7) != 0 sometimes, but who cares...
* we count rs_{total,left} in bits, not sectors. * we count rs_{total,left} in bits, not sectors.
*/ */
spin_lock_irqsave(&mdev->al_lock, flags);
count = drbd_bm_clear_bits(mdev, sbnr, ebnr); count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
if (count) { if (count && get_ldev(mdev)) {
/* we need the lock for drbd_try_clear_on_disk_bm */ unsigned long now = jiffies;
if (jiffies - mdev->rs_mark_time > HZ*10) { unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
/* should be rolling marks, int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
* but we estimate only anyways. */ if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) && unsigned long tw = drbd_bm_total_weight(mdev);
if (mdev->rs_mark_left[mdev->rs_last_mark] != tw &&
mdev->state.conn != C_PAUSED_SYNC_T && mdev->state.conn != C_PAUSED_SYNC_T &&
mdev->state.conn != C_PAUSED_SYNC_S) { mdev->state.conn != C_PAUSED_SYNC_S) {
mdev->rs_mark_time = jiffies; mdev->rs_mark_time[next] = now;
mdev->rs_mark_left = drbd_bm_total_weight(mdev); mdev->rs_mark_left[next] = tw;
mdev->rs_last_mark = next;
} }
} }
if (get_ldev(mdev)) { spin_lock_irqsave(&mdev->al_lock, flags);
drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE); drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
put_ldev(mdev); spin_unlock_irqrestore(&mdev->al_lock, flags);
}
/* just wake_up unconditional now, various lc_chaged(), /* just wake_up unconditional now, various lc_chaged(),
* lc_put() in drbd_try_clear_on_disk_bm(). */ * lc_put() in drbd_try_clear_on_disk_bm(). */
wake_up = 1; wake_up = 1;
put_ldev(mdev);
} }
spin_unlock_irqrestore(&mdev->al_lock, flags);
if (wake_up) if (wake_up)
wake_up(&mdev->al_wait); wake_up(&mdev->al_wait);
} }
......
...@@ -997,12 +997,16 @@ struct drbd_conf { ...@@ -997,12 +997,16 @@ struct drbd_conf {
unsigned long rs_start; unsigned long rs_start;
/* cumulated time in PausedSyncX state [unit jiffies] */ /* cumulated time in PausedSyncX state [unit jiffies] */
unsigned long rs_paused; unsigned long rs_paused;
/* skipped because csum was equal [unit BM_BLOCK_SIZE] */
unsigned long rs_same_csum;
#define DRBD_SYNC_MARKS 8
#define DRBD_SYNC_MARK_STEP (3*HZ)
/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */ /* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
unsigned long rs_mark_left; unsigned long rs_mark_left[DRBD_SYNC_MARKS];
/* marks's time [unit jiffies] */ /* marks's time [unit jiffies] */
unsigned long rs_mark_time; unsigned long rs_mark_time[DRBD_SYNC_MARKS];
/* skipped because csum was equeal [unit BM_BLOCK_SIZE] */ /* current index into rs_mark_{left,time} */
unsigned long rs_same_csum; int rs_last_mark;
/* where does the admin want us to start? (sector) */ /* where does the admin want us to start? (sector) */
sector_t ov_start_sector; sector_t ov_start_sector;
...@@ -1077,8 +1081,12 @@ struct drbd_conf { ...@@ -1077,8 +1081,12 @@ struct drbd_conf {
u64 ed_uuid; /* UUID of the exposed data */ u64 ed_uuid; /* UUID of the exposed data */
struct mutex state_mutex; struct mutex state_mutex;
char congestion_reason; /* Why we where congested... */ char congestion_reason; /* Why we where congested... */
atomic_t rs_sect_in; /* counter to measure the incoming resync data rate */ atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
int c_sync_rate; /* current resync rate after delay_probe magic */ atomic_t rs_sect_ev; /* for submitted resync data rate, both */
int rs_last_sect_ev; /* counter to compare with */
int rs_last_events; /* counter of read or write "events" (unit sectors)
* on the lower level device when we last looked. */
int c_sync_rate; /* current resync rate after syncer throttle magic */
struct fifo_buffer rs_plan_s; /* correction values of resync planer */ struct fifo_buffer rs_plan_s; /* correction values of resync planer */
int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
int rs_planed; /* resync sectors already planed */ int rs_planed; /* resync sectors already planed */
...@@ -2072,10 +2080,11 @@ static inline int get_net_conf(struct drbd_conf *mdev) ...@@ -2072,10 +2080,11 @@ static inline int get_net_conf(struct drbd_conf *mdev)
static inline void put_ldev(struct drbd_conf *mdev) static inline void put_ldev(struct drbd_conf *mdev)
{ {
int i = atomic_dec_return(&mdev->local_cnt);
__release(local); __release(local);
if (atomic_dec_and_test(&mdev->local_cnt)) D_ASSERT(i >= 0);
if (i == 0)
wake_up(&mdev->misc_wait); wake_up(&mdev->misc_wait);
D_ASSERT(atomic_read(&mdev->local_cnt) >= 0);
} }
#ifndef __CHECKER__ #ifndef __CHECKER__
......
...@@ -1064,7 +1064,8 @@ int __drbd_set_state(struct drbd_conf *mdev, ...@@ -1064,7 +1064,8 @@ int __drbd_set_state(struct drbd_conf *mdev,
if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
(ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) { (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) {
dev_info(DEV, "Syncer continues.\n"); dev_info(DEV, "Syncer continues.\n");
mdev->rs_paused += (long)jiffies-(long)mdev->rs_mark_time; mdev->rs_paused += (long)jiffies
-(long)mdev->rs_mark_time[mdev->rs_last_mark];
if (ns.conn == C_SYNC_TARGET) { if (ns.conn == C_SYNC_TARGET) {
if (!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags)) if (!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))
mod_timer(&mdev->resync_timer, jiffies); mod_timer(&mdev->resync_timer, jiffies);
...@@ -1078,27 +1079,33 @@ int __drbd_set_state(struct drbd_conf *mdev, ...@@ -1078,27 +1079,33 @@ int __drbd_set_state(struct drbd_conf *mdev,
if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) && if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) &&
(ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) { (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
dev_info(DEV, "Resync suspended\n"); dev_info(DEV, "Resync suspended\n");
mdev->rs_mark_time = jiffies; mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
if (ns.conn == C_PAUSED_SYNC_T) if (ns.conn == C_PAUSED_SYNC_T)
set_bit(STOP_SYNC_TIMER, &mdev->flags); set_bit(STOP_SYNC_TIMER, &mdev->flags);
} }
if (os.conn == C_CONNECTED && if (os.conn == C_CONNECTED &&
(ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) { (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
unsigned long now = jiffies;
int i;
mdev->ov_position = 0; mdev->ov_position = 0;
mdev->rs_total = mdev->rs_total = drbd_bm_bits(mdev);
mdev->rs_mark_left = drbd_bm_bits(mdev);
if (mdev->agreed_pro_version >= 90) if (mdev->agreed_pro_version >= 90)
set_ov_position(mdev, ns.conn); set_ov_position(mdev, ns.conn);
else else
mdev->ov_start_sector = 0; mdev->ov_start_sector = 0;
mdev->ov_left = mdev->rs_total mdev->ov_left = mdev->rs_total
- BM_SECT_TO_BIT(mdev->ov_position); - BM_SECT_TO_BIT(mdev->ov_position);
mdev->rs_start = mdev->rs_start = now;
mdev->rs_mark_time = jiffies;
mdev->ov_last_oos_size = 0; mdev->ov_last_oos_size = 0;
mdev->ov_last_oos_start = 0; mdev->ov_last_oos_start = 0;
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
mdev->rs_mark_left[i] = mdev->rs_total;
mdev->rs_mark_time[i] = now;
}
if (ns.conn == C_VERIFY_S) { if (ns.conn == C_VERIFY_S) {
dev_info(DEV, "Starting Online Verify from sector %llu\n", dev_info(DEV, "Starting Online Verify from sector %llu\n",
(unsigned long long)mdev->ov_position); (unsigned long long)mdev->ov_position);
...@@ -2793,6 +2800,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) ...@@ -2793,6 +2800,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
void drbd_mdev_cleanup(struct drbd_conf *mdev) void drbd_mdev_cleanup(struct drbd_conf *mdev)
{ {
int i;
if (mdev->receiver.t_state != None) if (mdev->receiver.t_state != None)
dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n", dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
mdev->receiver.t_state); mdev->receiver.t_state);
...@@ -2809,9 +2817,12 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) ...@@ -2809,9 +2817,12 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
mdev->p_size = mdev->p_size =
mdev->rs_start = mdev->rs_start =
mdev->rs_total = mdev->rs_total =
mdev->rs_failed = mdev->rs_failed = 0;
mdev->rs_mark_left = mdev->rs_last_events = 0;
mdev->rs_mark_time = 0; for (i = 0; i < DRBD_SYNC_MARKS; i++) {
mdev->rs_mark_left[i] = 0;
mdev->rs_mark_time[i] = 0;
}
D_ASSERT(mdev->net_conf == NULL); D_ASSERT(mdev->net_conf == NULL);
drbd_set_my_capacity(mdev, 0); drbd_set_my_capacity(mdev, 0);
......
...@@ -57,6 +57,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) ...@@ -57,6 +57,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
unsigned long db, dt, dbdt, rt, rs_left; unsigned long db, dt, dbdt, rt, rs_left;
unsigned int res; unsigned int res;
int i, x, y; int i, x, y;
int stalled = 0;
drbd_get_syncer_progress(mdev, &rs_left, &res); drbd_get_syncer_progress(mdev, &rs_left, &res);
...@@ -90,18 +91,17 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) ...@@ -90,18 +91,17 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
* db: blocks written from mark until now * db: blocks written from mark until now
* rt: remaining time * rt: remaining time
*/ */
dt = (jiffies - mdev->rs_mark_time) / HZ; /* Rolling marks. last_mark+1 may just now be modified. last_mark+2 is
* at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
if (dt > 20) { * least DRBD_SYNC_MARK_STEP time before it will be modified. */
/* if we made no update to rs_mark_time for too long, i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS;
* we are stalled. show that. */ dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
seq_printf(seq, "stalled\n"); if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
return; stalled = 1;
}
if (!dt) if (!dt)
dt++; dt++;
db = mdev->rs_mark_left - rs_left; db = mdev->rs_mark_left[i] - rs_left;
rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */ rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
seq_printf(seq, "finish: %lu:%02lu:%02lu", seq_printf(seq, "finish: %lu:%02lu:%02lu",
...@@ -128,7 +128,14 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) ...@@ -128,7 +128,14 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
else else
seq_printf(seq, " (%ld)", dbdt); seq_printf(seq, " (%ld)", dbdt);
seq_printf(seq, " K/sec\n"); if (mdev->state.conn == C_SYNC_TARGET) {
if (mdev->c_sync_rate > 1000)
seq_printf(seq, " want: %d,%03d",
mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000);
else
seq_printf(seq, " want: %d", mdev->c_sync_rate);
}
seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
} }
static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
......
...@@ -1481,13 +1481,19 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) ...@@ -1481,13 +1481,19 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
r = SS_UNKNOWN_ERROR; r = SS_UNKNOWN_ERROR;
if (r == SS_SUCCESS) { if (r == SS_SUCCESS) {
mdev->rs_total = unsigned long tw = drbd_bm_total_weight(mdev);
mdev->rs_mark_left = drbd_bm_total_weight(mdev); unsigned long now = jiffies;
int i;
mdev->rs_failed = 0; mdev->rs_failed = 0;
mdev->rs_paused = 0; mdev->rs_paused = 0;
mdev->rs_start =
mdev->rs_mark_time = jiffies;
mdev->rs_same_csum = 0; mdev->rs_same_csum = 0;
mdev->rs_total = tw;
mdev->rs_start = now;
for (i = 0; i < DRBD_SYNC_MARKS; i++) {
mdev->rs_mark_left[i] = tw;
mdev->rs_mark_time[i] = now;
}
_drbd_pause_after(mdev); _drbd_pause_after(mdev);
} }
write_unlock_irq(&global_state_lock); write_unlock_irq(&global_state_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment