Commit a220d291 authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Philipp Reisner

drbd: allow bitmap to change during writeout from resync_finished

Symptom: messages similar to
 "FIXME asender in bm_change_bits_to,
  bitmap locked for 'write from resync_finished' by worker"

If a resync or verify is finished (or aborted), a full bitmap writeout
is triggered.  If we have ongoing local IO, the bitmap may still change
during that writeout, pending and not yet processed acks may cause bits
to be cleared, while new writes may cause bits to be to be set.

To fix this, introduce the drbd_bm_write_copy_pages() variant.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent 5016b82a
...@@ -1190,6 +1190,22 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l ...@@ -1190,6 +1190,22 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l
return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx); return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx);
} }
/**
* drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location.
* @mdev: DRBD device.
*
* Will only write pages that have changed since last IO.
* In contrast to drbd_bm_write(), this will copy the bitmap pages
* to temporary writeout pages. It is intended to trigger a full write-out
* while still allowing the bitmap to change, for example if a resync or online
* verify is aborted due to a failed peer disk, while local IO continues, or
* pending resync acks are still being processed.
*/
int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local)
{
return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0);
}
/** /**
* drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed. * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
* @mdev: DRBD device. * @mdev: DRBD device.
......
...@@ -709,22 +709,28 @@ enum bm_flag { ...@@ -709,22 +709,28 @@ enum bm_flag {
BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */ BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
/* currently locked for bulk operation */ /* currently locked for bulk operation */
BM_LOCKED_MASK = 0x7, BM_LOCKED_MASK = 0xf,
/* in detail, that is: */ /* in detail, that is: */
BM_DONT_CLEAR = 0x1, BM_DONT_CLEAR = 0x1,
BM_DONT_SET = 0x2, BM_DONT_SET = 0x2,
BM_DONT_TEST = 0x4, BM_DONT_TEST = 0x4,
/* so we can mark it locked for bulk operation,
* and still allow all non-bulk operations */
BM_IS_LOCKED = 0x8,
/* (test bit, count bit) allowed (common case) */ /* (test bit, count bit) allowed (common case) */
BM_LOCKED_TEST_ALLOWED = 0x3, BM_LOCKED_TEST_ALLOWED = BM_DONT_CLEAR | BM_DONT_SET | BM_IS_LOCKED,
/* testing bits, as well as setting new bits allowed, but clearing bits /* testing bits, as well as setting new bits allowed, but clearing bits
* would be unexpected. Used during bitmap receive. Setting new bits * would be unexpected. Used during bitmap receive. Setting new bits
* requires sending of "out-of-sync" information, though. */ * requires sending of "out-of-sync" information, though. */
BM_LOCKED_SET_ALLOWED = 0x1, BM_LOCKED_SET_ALLOWED = BM_DONT_CLEAR | BM_IS_LOCKED,
/* clear is not expected while bitmap is locked for bulk operation */ /* for drbd_bm_write_copy_pages, everything is allowed,
* only concurrent bulk operations are locked out. */
BM_LOCKED_CHANGE_ALLOWED = BM_IS_LOCKED,
}; };
struct drbd_work_queue { struct drbd_work_queue {
...@@ -1306,6 +1312,7 @@ extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); ...@@ -1306,6 +1312,7 @@ extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
extern void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr); extern void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr);
extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
extern int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local);
extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local);
extern size_t drbd_bm_words(struct drbd_conf *mdev); extern size_t drbd_bm_words(struct drbd_conf *mdev);
extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); extern unsigned long drbd_bm_bits(struct drbd_conf *mdev);
extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); extern sector_t drbd_bm_capacity(struct drbd_conf *mdev);
......
...@@ -1421,8 +1421,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, ...@@ -1421,8 +1421,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
* No harm done if some bits change during this phase. * No harm done if some bits change during this phase.
*/ */
if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL,
"write from resync_finished", BM_LOCKED_SET_ALLOWED); "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
put_ldev(mdev); put_ldev(mdev);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment