Commit c186b128 authored by Goldwyn Rodrigues's avatar Goldwyn Rodrigues

md-cluster: Perform resync/recovery under a DLM lock

Resync or recovery must be performed by only one node at a time.
A DLM lock resource, resync_lockres provides the mutual exclusion
so that only one node performs the recovery/resync at a time.

If a node is unable to get the resync_lockres, because recovery is
being performed by another node, it set MD_RECOVER_NEEDED so as
to schedule recovery in the future.

Remove the debug message in resync_info_update()
used during development.
Signed-off-by: default avatarGoldwyn Rodrigues <rgoldwyn@suse.com>
parent 2aa82191
...@@ -55,6 +55,7 @@ struct md_cluster_info { ...@@ -55,6 +55,7 @@ struct md_cluster_info {
struct completion completion; struct completion completion;
struct mutex sb_mutex; struct mutex sb_mutex;
struct dlm_lock_resource *bitmap_lockres; struct dlm_lock_resource *bitmap_lockres;
struct dlm_lock_resource *resync_lockres;
struct list_head suspend_list; struct list_head suspend_list;
spinlock_t suspend_lock; spinlock_t suspend_lock;
struct md_thread *recovery_thread; struct md_thread *recovery_thread;
...@@ -384,6 +385,8 @@ static void process_suspend_info(struct mddev *mddev, ...@@ -384,6 +385,8 @@ static void process_suspend_info(struct mddev *mddev,
if (!hi) { if (!hi) {
remove_suspend_info(mddev, slot); remove_suspend_info(mddev, slot);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
return; return;
} }
s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL); s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
...@@ -758,6 +761,10 @@ static int join(struct mddev *mddev, int nodes) ...@@ -758,6 +761,10 @@ static int join(struct mddev *mddev, int nodes)
goto err; goto err;
} }
cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0);
if (!cinfo->resync_lockres)
goto err;
ret = gather_all_resync_info(mddev, nodes); ret = gather_all_resync_info(mddev, nodes);
if (ret) if (ret)
goto err; goto err;
...@@ -768,6 +775,7 @@ static int join(struct mddev *mddev, int nodes) ...@@ -768,6 +775,7 @@ static int join(struct mddev *mddev, int nodes)
lockres_free(cinfo->token_lockres); lockres_free(cinfo->token_lockres);
lockres_free(cinfo->ack_lockres); lockres_free(cinfo->ack_lockres);
lockres_free(cinfo->no_new_dev_lockres); lockres_free(cinfo->no_new_dev_lockres);
lockres_free(cinfo->resync_lockres);
lockres_free(cinfo->bitmap_lockres); lockres_free(cinfo->bitmap_lockres);
if (cinfo->lockspace) if (cinfo->lockspace)
dlm_release_lockspace(cinfo->lockspace, 2); dlm_release_lockspace(cinfo->lockspace, 2);
...@@ -861,6 +869,13 @@ static int metadata_update_cancel(struct mddev *mddev) ...@@ -861,6 +869,13 @@ static int metadata_update_cancel(struct mddev *mddev)
return dlm_unlock_sync(cinfo->token_lockres); return dlm_unlock_sync(cinfo->token_lockres);
} }
static int resync_start(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
cinfo->resync_lockres->flags |= DLM_LKF_NOQUEUE;
return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX);
}
static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
{ {
struct md_cluster_info *cinfo = mddev->cluster_info; struct md_cluster_info *cinfo = mddev->cluster_info;
...@@ -870,16 +885,22 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) ...@@ -870,16 +885,22 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi); add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
/* Re-acquire the lock to refresh LVB */ /* Re-acquire the lock to refresh LVB */
dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW); dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__,
(unsigned long long)lo,
(unsigned long long)hi);
cmsg.type = cpu_to_le32(RESYNCING); cmsg.type = cpu_to_le32(RESYNCING);
cmsg.slot = cpu_to_le32(slot); cmsg.slot = cpu_to_le32(slot);
cmsg.low = cpu_to_le64(lo); cmsg.low = cpu_to_le64(lo);
cmsg.high = cpu_to_le64(hi); cmsg.high = cpu_to_le64(hi);
return sendmsg(cinfo, &cmsg); return sendmsg(cinfo, &cmsg);
} }
static int resync_finish(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
cinfo->resync_lockres->flags &= ~DLM_LKF_NOQUEUE;
dlm_unlock_sync(cinfo->resync_lockres);
return resync_info_update(mddev, 0, 0);
}
static int area_resyncing(struct mddev *mddev, int direction, static int area_resyncing(struct mddev *mddev, int direction,
sector_t lo, sector_t hi) sector_t lo, sector_t hi)
{ {
...@@ -995,6 +1016,8 @@ static struct md_cluster_operations cluster_ops = { ...@@ -995,6 +1016,8 @@ static struct md_cluster_operations cluster_ops = {
.join = join, .join = join,
.leave = leave, .leave = leave,
.slot_number = slot_number, .slot_number = slot_number,
.resync_start = resync_start,
.resync_finish = resync_finish,
.resync_info_update = resync_info_update, .resync_info_update = resync_info_update,
.metadata_update_start = metadata_update_start, .metadata_update_start = metadata_update_start,
.metadata_update_finish = metadata_update_finish, .metadata_update_finish = metadata_update_finish,
......
...@@ -16,6 +16,8 @@ struct md_cluster_operations { ...@@ -16,6 +16,8 @@ struct md_cluster_operations {
int (*metadata_update_start)(struct mddev *mddev); int (*metadata_update_start)(struct mddev *mddev);
int (*metadata_update_finish)(struct mddev *mddev); int (*metadata_update_finish)(struct mddev *mddev);
int (*metadata_update_cancel)(struct mddev *mddev); int (*metadata_update_cancel)(struct mddev *mddev);
int (*resync_start)(struct mddev *mddev);
int (*resync_finish)(struct mddev *mddev);
int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi); int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi);
int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev); int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev);
int (*add_new_disk_finish)(struct mddev *mddev); int (*add_new_disk_finish)(struct mddev *mddev);
......
...@@ -7657,6 +7657,7 @@ void md_do_sync(struct md_thread *thread) ...@@ -7657,6 +7657,7 @@ void md_do_sync(struct md_thread *thread)
struct md_rdev *rdev; struct md_rdev *rdev;
char *desc, *action = NULL; char *desc, *action = NULL;
struct blk_plug plug; struct blk_plug plug;
bool cluster_resync_finished = false;
/* just incase thread restarts... */ /* just incase thread restarts... */
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
...@@ -7959,7 +7960,11 @@ void md_do_sync(struct md_thread *thread) ...@@ -7959,7 +7960,11 @@ void md_do_sync(struct md_thread *thread)
mddev->curr_resync_completed = mddev->curr_resync; mddev->curr_resync_completed = mddev->curr_resync;
sysfs_notify(&mddev->kobj, NULL, "sync_completed"); sysfs_notify(&mddev->kobj, NULL, "sync_completed");
} }
/* tell personality that we are finished */ /* tell personality and other nodes that we are finished */
if (mddev_is_clustered(mddev)) {
md_cluster_ops->resync_finish(mddev);
cluster_resync_finished = true;
}
mddev->pers->sync_request(mddev, max_sectors, &skipped); mddev->pers->sync_request(mddev, max_sectors, &skipped);
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
...@@ -7997,6 +8002,11 @@ void md_do_sync(struct md_thread *thread) ...@@ -7997,6 +8002,11 @@ void md_do_sync(struct md_thread *thread)
skip: skip:
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
if (mddev_is_clustered(mddev) &&
test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!cluster_resync_finished)
md_cluster_ops->resync_finish(mddev);
spin_lock(&mddev->lock); spin_lock(&mddev->lock);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
/* We completed so min/max setting can be forgotten if used. */ /* We completed so min/max setting can be forgotten if used. */
...@@ -8078,14 +8088,25 @@ static int remove_and_add_spares(struct mddev *mddev, ...@@ -8078,14 +8088,25 @@ static int remove_and_add_spares(struct mddev *mddev,
static void md_start_sync(struct work_struct *ws) static void md_start_sync(struct work_struct *ws)
{ {
struct mddev *mddev = container_of(ws, struct mddev, del_work); struct mddev *mddev = container_of(ws, struct mddev, del_work);
int ret = 0;
if (mddev_is_clustered(mddev)) {
ret = md_cluster_ops->resync_start(mddev);
if (ret) {
mddev->sync_thread = NULL;
goto out;
}
}
mddev->sync_thread = md_register_thread(md_do_sync, mddev->sync_thread = md_register_thread(md_do_sync,
mddev, mddev,
"resync"); "resync");
out:
if (!mddev->sync_thread) { if (!mddev->sync_thread) {
printk(KERN_ERR "%s: could not start resync" if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
" thread...\n", printk(KERN_ERR "%s: could not start resync"
mdname(mddev)); " thread...\n",
mdname(mddev));
/* leave the spares where they are, it shouldn't hurt */ /* leave the spares where they are, it shouldn't hurt */
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
......
...@@ -2503,8 +2503,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp ...@@ -2503,8 +2503,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
if (mddev_is_clustered(mddev)) { if (mddev_is_clustered(mddev)) {
conf->cluster_sync_low = 0; conf->cluster_sync_low = 0;
conf->cluster_sync_high = 0; conf->cluster_sync_high = 0;
/* Send zeros to mark end of resync */
md_cluster_ops->resync_info_update(mddev, 0, 0);
} }
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment