Commit e3555d85 authored by Philipp Reisner's avatar Philipp Reisner

drbd: Implemented priority inheritance for resync requests

We only issue resync requests if there is no significant application IO
going on. = Application IO has higher priority than resnyc IO.

If application IO can not be started because the resync process locked
an resync_lru entry, start the IO operations necessary to release the
lock ASAP.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent 59817f4f
...@@ -182,6 +182,7 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) ...@@ -182,6 +182,7 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
if (unlikely(tmp != NULL)) { if (unlikely(tmp != NULL)) {
struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
set_bit(BME_PRIORITY, &bm_ext->flags);
spin_unlock_irq(&mdev->al_lock); spin_unlock_irq(&mdev->al_lock);
return NULL; return NULL;
} }
...@@ -1297,8 +1298,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector) ...@@ -1297,8 +1298,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
} }
if (lc_put(mdev->resync, &bm_ext->lce) == 0) { if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
clear_bit(BME_LOCKED, &bm_ext->flags); bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
clear_bit(BME_NO_WRITES, &bm_ext->flags);
mdev->resync_locked--; mdev->resync_locked--;
wake_up(&mdev->al_wait); wake_up(&mdev->al_wait);
} }
......
...@@ -1326,6 +1326,7 @@ struct bm_extent { ...@@ -1326,6 +1326,7 @@ struct bm_extent {
#define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */ #define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */
#define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */ #define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */
#define BME_PRIORITY 2 /* finish resync IO on this extent ASAP! App IO waiting! */
/* drbd_bitmap.c */ /* drbd_bitmap.c */
/* /*
...@@ -1552,7 +1553,7 @@ extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int); ...@@ -1552,7 +1553,7 @@ extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int);
extern void resync_timer_fn(unsigned long data); extern void resync_timer_fn(unsigned long data);
/* drbd_receiver.c */ /* drbd_receiver.c */
extern int drbd_rs_should_slow_down(struct drbd_conf *mdev); extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector);
extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
const unsigned rw, const int fault_type); const unsigned rw, const int fault_type);
extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
......
...@@ -1862,10 +1862,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned ...@@ -1862,10 +1862,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
* The current sync rate used here uses only the most recent two step marks, * The current sync rate used here uses only the most recent two step marks,
* to have a short time average so we can react faster. * to have a short time average so we can react faster.
*/ */
int drbd_rs_should_slow_down(struct drbd_conf *mdev) int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
{ {
struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk; struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
unsigned long db, dt, dbdt; unsigned long db, dt, dbdt;
struct lc_element *tmp;
int curr_events; int curr_events;
int throttle = 0; int throttle = 0;
...@@ -1873,9 +1874,22 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev) ...@@ -1873,9 +1874,22 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev)
if (mdev->sync_conf.c_min_rate == 0) if (mdev->sync_conf.c_min_rate == 0)
return 0; return 0;
spin_lock_irq(&mdev->al_lock);
tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
if (tmp) {
struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
spin_unlock_irq(&mdev->al_lock);
return 0;
}
/* Do not slow down if app IO is already waiting for this extent */
}
spin_unlock_irq(&mdev->al_lock);
curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
(int)part_stat_read(&disk->part0, sectors[1]) - (int)part_stat_read(&disk->part0, sectors[1]) -
atomic_read(&mdev->rs_sect_ev); atomic_read(&mdev->rs_sect_ev);
if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) { if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
unsigned long rs_left; unsigned long rs_left;
int i; int i;
...@@ -2060,9 +2074,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un ...@@ -2060,9 +2074,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
* we would also throttle its application reads. * we would also throttle its application reads.
* In that case, throttling is done on the SyncTarget only. * In that case, throttling is done on the SyncTarget only.
*/ */
if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev)) if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
msleep(100); schedule_timeout_uninterruptible(HZ/10);
if (drbd_rs_begin_io(mdev, e->sector)) if (drbd_rs_begin_io(mdev, sector))
goto out_free_e; goto out_free_e;
submit_for_resync: submit_for_resync:
......
...@@ -355,7 +355,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) ...@@ -355,7 +355,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
if (!get_ldev(mdev)) if (!get_ldev(mdev))
return -EIO; return -EIO;
if (drbd_rs_should_slow_down(mdev)) if (drbd_rs_should_slow_down(mdev, sector))
goto defer; goto defer;
/* GFP_TRY, because if there is no memory available right now, this may /* GFP_TRY, because if there is no memory available right now, this may
...@@ -503,16 +503,6 @@ int drbd_rs_number_requests(struct drbd_conf *mdev) ...@@ -503,16 +503,6 @@ int drbd_rs_number_requests(struct drbd_conf *mdev)
number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
} }
/* Throttle resync on lower level disk activity, which may also be
* caused by application IO on Primary/SyncTarget.
* Keep this after the call to drbd_rs_controller, as that assumes
* to be called as precisely as possible every SLEEP_TIME,
* and would be confused otherwise. */
if (number && drbd_rs_should_slow_down(mdev)) {
mdev->c_sync_rate = 1;
number = 0;
}
/* ignore the amount of pending requests, the resync controller should /* ignore the amount of pending requests, the resync controller should
* throttle down to incoming reply rate soon enough anyways. */ * throttle down to incoming reply rate soon enough anyways. */
return number; return number;
...@@ -594,7 +584,8 @@ int w_make_resync_request(struct drbd_conf *mdev, ...@@ -594,7 +584,8 @@ int w_make_resync_request(struct drbd_conf *mdev,
sector = BM_BIT_TO_SECT(bit); sector = BM_BIT_TO_SECT(bit);
if (drbd_try_rs_begin_io(mdev, sector)) { if (drbd_rs_should_slow_down(mdev, sector) ||
drbd_try_rs_begin_io(mdev, sector)) {
mdev->bm_resync_fo = bit; mdev->bm_resync_fo = bit;
goto requeue; goto requeue;
} }
...@@ -719,7 +710,8 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca ...@@ -719,7 +710,8 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
size = BM_BLOCK_SIZE; size = BM_BLOCK_SIZE;
if (drbd_try_rs_begin_io(mdev, sector)) { if (drbd_rs_should_slow_down(mdev, sector) ||
drbd_try_rs_begin_io(mdev, sector)) {
mdev->ov_position = sector; mdev->ov_position = sector;
goto requeue; goto requeue;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment