Commit dbb64f86 authored by Goldwyn Rodrigues's avatar Goldwyn Rodrigues

md-cluster: Fix adding of new disk with new reload code

Adding the disk worked incorrectly with the new reload code. Fix it:

 - No operation should be performed on rdev marked as Candidate
 - After a metadata update operation, kick disk if role is 0xfffe
   else clear Candidate bit and continue with the regular change check.
 - Saving the mode of the lock resource to check if token lock is already
   locked, because it can be called twice while adding a disk. However,
   unlock_comm() must be called only once.
 - add_new_disk() is called by the node initiating the --add operation.
   If it needs to be canceled, call add_new_disk_cancel(). The operation
   is completed by md_update_sb() which will write and unlock the
   communication.
Signed-off-by: default avatarGoldwyn Rodrigues <rgoldwyn@suse.com>
parent c186b128
...@@ -28,6 +28,7 @@ struct dlm_lock_resource { ...@@ -28,6 +28,7 @@ struct dlm_lock_resource {
struct completion completion; /* completion for synchronized locking */ struct completion completion; /* completion for synchronized locking */
void (*bast)(void *arg, int mode); /* blocking AST function pointer*/ void (*bast)(void *arg, int mode); /* blocking AST function pointer*/
struct mddev *mddev; /* pointing back to mddev. */ struct mddev *mddev; /* pointing back to mddev. */
int mode;
}; };
struct suspend_info { struct suspend_info {
...@@ -107,6 +108,8 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode) ...@@ -107,6 +108,8 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
if (ret) if (ret)
return ret; return ret;
wait_for_completion(&res->completion); wait_for_completion(&res->completion);
if (res->lksb.sb_status == 0)
res->mode = mode;
return res->lksb.sb_status; return res->lksb.sb_status;
} }
...@@ -128,6 +131,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev, ...@@ -128,6 +131,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
init_completion(&res->completion); init_completion(&res->completion);
res->ls = cinfo->lockspace; res->ls = cinfo->lockspace;
res->mddev = mddev; res->mddev = mddev;
res->mode = DLM_LOCK_IV;
namelen = strlen(name); namelen = strlen(name);
res->name = kzalloc(namelen + 1, GFP_KERNEL); res->name = kzalloc(namelen + 1, GFP_KERNEL);
if (!res->name) { if (!res->name) {
...@@ -536,11 +540,17 @@ static void recv_daemon(struct md_thread *thread) ...@@ -536,11 +540,17 @@ static void recv_daemon(struct md_thread *thread)
/* lock_comm() /* lock_comm()
* Takes the lock on the TOKEN lock resource so no other * Takes the lock on the TOKEN lock resource so no other
* node can communicate while the operation is underway. * node can communicate while the operation is underway.
* If called again, and the TOKEN lock is alread in EX mode
* return success. However, care must be taken that unlock_comm()
* is called only once.
*/ */
static int lock_comm(struct md_cluster_info *cinfo) static int lock_comm(struct md_cluster_info *cinfo)
{ {
int error; int error;
if (cinfo->token_lockres->mode == DLM_LOCK_EX)
return 0;
error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX); error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
if (error) if (error)
pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n", pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n",
...@@ -550,6 +560,7 @@ static int lock_comm(struct md_cluster_info *cinfo) ...@@ -550,6 +560,7 @@ static int lock_comm(struct md_cluster_info *cinfo)
static void unlock_comm(struct md_cluster_info *cinfo) static void unlock_comm(struct md_cluster_info *cinfo)
{ {
WARN_ON(cinfo->token_lockres->mode != DLM_LOCK_EX);
dlm_unlock_sync(cinfo->token_lockres); dlm_unlock_sync(cinfo->token_lockres);
} }
...@@ -862,11 +873,10 @@ static int metadata_update_finish(struct mddev *mddev) ...@@ -862,11 +873,10 @@ static int metadata_update_finish(struct mddev *mddev)
return ret; return ret;
} }
static int metadata_update_cancel(struct mddev *mddev) static void metadata_update_cancel(struct mddev *mddev)
{ {
struct md_cluster_info *cinfo = mddev->cluster_info; struct md_cluster_info *cinfo = mddev->cluster_info;
unlock_comm(cinfo);
return dlm_unlock_sync(cinfo->token_lockres);
} }
static int resync_start(struct mddev *mddev) static int resync_start(struct mddev *mddev)
...@@ -925,7 +935,11 @@ static int area_resyncing(struct mddev *mddev, int direction, ...@@ -925,7 +935,11 @@ static int area_resyncing(struct mddev *mddev, int direction,
return ret; return ret;
} }
static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev) /* add_new_disk() - initiates a disk add
* However, if this fails before writing md_update_sb(),
* add_new_disk_cancel() must be called to release token lock
*/
static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev)
{ {
struct md_cluster_info *cinfo = mddev->cluster_info; struct md_cluster_info *cinfo = mddev->cluster_info;
struct cluster_msg cmsg; struct cluster_msg cmsg;
...@@ -947,16 +961,17 @@ static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev) ...@@ -947,16 +961,17 @@ static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev)
/* Some node does not "see" the device */ /* Some node does not "see" the device */
if (ret == -EAGAIN) if (ret == -EAGAIN)
ret = -ENOENT; ret = -ENOENT;
if (ret)
unlock_comm(cinfo);
else else
dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR); dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
return ret; return ret;
} }
static int add_new_disk_finish(struct mddev *mddev) static void add_new_disk_cancel(struct mddev *mddev)
{ {
/* Write sb and inform others */ struct md_cluster_info *cinfo = mddev->cluster_info;
md_update_sb(mddev, 1); unlock_comm(cinfo);
return metadata_update_finish(mddev);
} }
static int new_disk_ack(struct mddev *mddev, bool ack) static int new_disk_ack(struct mddev *mddev, bool ack)
...@@ -1023,8 +1038,8 @@ static struct md_cluster_operations cluster_ops = { ...@@ -1023,8 +1038,8 @@ static struct md_cluster_operations cluster_ops = {
.metadata_update_finish = metadata_update_finish, .metadata_update_finish = metadata_update_finish,
.metadata_update_cancel = metadata_update_cancel, .metadata_update_cancel = metadata_update_cancel,
.area_resyncing = area_resyncing, .area_resyncing = area_resyncing,
.add_new_disk_start = add_new_disk_start, .add_new_disk = add_new_disk,
.add_new_disk_finish = add_new_disk_finish, .add_new_disk_cancel = add_new_disk_cancel,
.new_disk_ack = new_disk_ack, .new_disk_ack = new_disk_ack,
.remove_disk = remove_disk, .remove_disk = remove_disk,
.gather_bitmaps = gather_bitmaps, .gather_bitmaps = gather_bitmaps,
......
...@@ -15,12 +15,12 @@ struct md_cluster_operations { ...@@ -15,12 +15,12 @@ struct md_cluster_operations {
int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi); int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
int (*metadata_update_start)(struct mddev *mddev); int (*metadata_update_start)(struct mddev *mddev);
int (*metadata_update_finish)(struct mddev *mddev); int (*metadata_update_finish)(struct mddev *mddev);
int (*metadata_update_cancel)(struct mddev *mddev); void (*metadata_update_cancel)(struct mddev *mddev);
int (*resync_start)(struct mddev *mddev); int (*resync_start)(struct mddev *mddev);
int (*resync_finish)(struct mddev *mddev); int (*resync_finish)(struct mddev *mddev);
int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi); int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi);
int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev); int (*add_new_disk)(struct mddev *mddev, struct md_rdev *rdev);
int (*add_new_disk_finish)(struct mddev *mddev); void (*add_new_disk_cancel)(struct mddev *mddev);
int (*new_disk_ack)(struct mddev *mddev, bool ack); int (*new_disk_ack)(struct mddev *mddev, bool ack);
int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev); int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
int (*gather_bitmaps)(struct md_rdev *rdev); int (*gather_bitmaps)(struct md_rdev *rdev);
......
...@@ -3246,14 +3246,6 @@ static void analyze_sbs(struct mddev *mddev) ...@@ -3246,14 +3246,6 @@ static void analyze_sbs(struct mddev *mddev)
md_kick_rdev_from_array(rdev); md_kick_rdev_from_array(rdev);
continue; continue;
} }
/* No device should have a Candidate flag
* when reading devices
*/
if (test_bit(Candidate, &rdev->flags)) {
pr_info("md: kicking Cluster Candidate %s from array!\n",
bdevname(rdev->bdev, b));
md_kick_rdev_from_array(rdev);
}
} }
if (mddev->level == LEVEL_MULTIPATH) { if (mddev->level == LEVEL_MULTIPATH) {
rdev->desc_nr = i++; rdev->desc_nr = i++;
...@@ -5950,19 +5942,12 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) ...@@ -5950,19 +5942,12 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
* check whether the device shows up in other nodes * check whether the device shows up in other nodes
*/ */
if (mddev_is_clustered(mddev)) { if (mddev_is_clustered(mddev)) {
if (info->state & (1 << MD_DISK_CANDIDATE)) { if (info->state & (1 << MD_DISK_CANDIDATE))
/* Through --cluster-confirm */
set_bit(Candidate, &rdev->flags); set_bit(Candidate, &rdev->flags);
err = md_cluster_ops->new_disk_ack(mddev, true); else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
if (err) {
export_rdev(rdev);
return err;
}
} else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
/* --add initiated by this node */ /* --add initiated by this node */
err = md_cluster_ops->add_new_disk_start(mddev, rdev); err = md_cluster_ops->add_new_disk(mddev, rdev);
if (err) { if (err) {
md_cluster_ops->add_new_disk_finish(mddev);
export_rdev(rdev); export_rdev(rdev);
return err; return err;
} }
...@@ -5971,13 +5956,23 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) ...@@ -5971,13 +5956,23 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
rdev->raid_disk = -1; rdev->raid_disk = -1;
err = bind_rdev_to_array(rdev, mddev); err = bind_rdev_to_array(rdev, mddev);
if (err) if (err)
export_rdev(rdev); export_rdev(rdev);
if (mddev_is_clustered(mddev)) {
if (info->state & (1 << MD_DISK_CANDIDATE))
md_cluster_ops->new_disk_ack(mddev, (err == 0));
else {
if (err)
md_cluster_ops->add_new_disk_cancel(mddev);
else else
err = add_bound_rdev(rdev); err = add_bound_rdev(rdev);
if (mddev_is_clustered(mddev) && }
(info->state & (1 << MD_DISK_CLUSTER_ADD)))
md_cluster_ops->add_new_disk_finish(mddev); } else if (!err)
err = add_bound_rdev(rdev);
return err; return err;
} }
...@@ -8055,6 +8050,8 @@ static int remove_and_add_spares(struct mddev *mddev, ...@@ -8055,6 +8050,8 @@ static int remove_and_add_spares(struct mddev *mddev,
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
if (this && this != rdev) if (this && this != rdev)
continue; continue;
if (test_bit(Candidate, &rdev->flags))
continue;
if (rdev->raid_disk >= 0 && if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) && !test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags)) !test_bit(Faulty, &rdev->flags))
...@@ -8972,6 +8969,17 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) ...@@ -8972,6 +8969,17 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
/* Check if the roles changed */ /* Check if the roles changed */
role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]); role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
if (test_bit(Candidate, &rdev2->flags)) {
if (role == 0xfffe) {
pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
md_kick_rdev_from_array(rdev2);
continue;
}
else
clear_bit(Candidate, &rdev2->flags);
}
if (role != rdev2->raid_disk) { if (role != rdev2->raid_disk) {
/* got activated */ /* got activated */
if (rdev2->raid_disk == -1 && role != 0xffff) { if (rdev2->raid_disk == -1 && role != 0xffff) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment