Commit e2faea4c authored by Kurt Hackel's avatar Kurt Hackel Committed by Mark Fasheh

[PATCH] ocfs2/dlm: fixes

* fix a hang which can occur during shutdown migration
* do not allow nodes to join during recovery
* when restarting lock mastery, do not ignore nodes which come up
* more than one node could become recovery master, fix this
* sleep to allow some time for heartbeat state to catch up to network
* extra debug info for bad recovery state problems
* make DLM_RECO_NODE_DATA_DONE a valid state for non-master recovery nodes
* prune all locks from dead nodes on $RECOVERY lock resources
* do NOT automatically add new nodes to mle nodemaps until they have properly
  joined the domain
* make sure dlm_pick_recovery_master only exits when all nodes have synced
* properly handle dlmunlock errors in dlm_pick_recovery_master
* do not propagate network errors in dlm_send_begin_reco_message
* dead nodes were not being put in the recovery map sometimes, fix this
* dlmunlock was failing to clear the unlock actions on DLM_DENIED
Signed-off-by: default avatarKurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: default avatarMark Fasheh <mark.fasheh@oracle.com>
parent 0d419a6a
...@@ -657,6 +657,7 @@ void dlm_complete_thread(struct dlm_ctxt *dlm); ...@@ -657,6 +657,7 @@ void dlm_complete_thread(struct dlm_ctxt *dlm);
int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
void dlm_wait_for_recovery(struct dlm_ctxt *dlm); void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
void dlm_put(struct dlm_ctxt *dlm); void dlm_put(struct dlm_ctxt *dlm);
struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
......
...@@ -573,8 +573,11 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data) ...@@ -573,8 +573,11 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
spin_lock(&dlm_domain_lock); spin_lock(&dlm_domain_lock);
dlm = __dlm_lookup_domain_full(query->domain, query->name_len); dlm = __dlm_lookup_domain_full(query->domain, query->name_len);
/* Once the dlm ctxt is marked as leaving then we don't want /* Once the dlm ctxt is marked as leaving then we don't want
* to be put in someone's domain map. */ * to be put in someone's domain map.
* Also, explicitly disallow joining at certain troublesome
* times (ie. during recovery). */
if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) {
int bit = query->node_idx;
spin_lock(&dlm->spinlock); spin_lock(&dlm->spinlock);
if (dlm->dlm_state == DLM_CTXT_NEW && if (dlm->dlm_state == DLM_CTXT_NEW &&
...@@ -586,6 +589,19 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data) ...@@ -586,6 +589,19 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
} else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { } else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) {
/* Disallow parallel joins. */ /* Disallow parallel joins. */
response = JOIN_DISALLOW; response = JOIN_DISALLOW;
} else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
mlog(ML_NOTICE, "node %u trying to join, but recovery "
"is ongoing.\n", bit);
response = JOIN_DISALLOW;
} else if (test_bit(bit, dlm->recovery_map)) {
mlog(ML_NOTICE, "node %u trying to join, but it "
"still needs recovery.\n", bit);
response = JOIN_DISALLOW;
} else if (test_bit(bit, dlm->domain_map)) {
mlog(ML_NOTICE, "node %u trying to join, but it "
"is still in the domain! needs recovery?\n",
bit);
response = JOIN_DISALLOW;
} else { } else {
/* Alright we're fully a part of this domain /* Alright we're fully a part of this domain
* so we keep some state as to who's joining * so we keep some state as to who's joining
......
...@@ -1050,17 +1050,10 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, ...@@ -1050,17 +1050,10 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
node = dlm_bitmap_diff_iter_next(&bdi, &sc); node = dlm_bitmap_diff_iter_next(&bdi, &sc);
while (node >= 0) { while (node >= 0) {
if (sc == NODE_UP) { if (sc == NODE_UP) {
/* a node came up. easy. might not even need /* a node came up. clear any old vote from
* to talk to it if its node number is higher * the response map and set it in the vote map
* or if we are already blocked. */ * then restart the mastery. */
mlog(0, "node up! %d\n", node); mlog(ML_NOTICE, "node %d up while restarting\n", node);
if (blocked)
goto next;
if (node > dlm->node_num) {
mlog(0, "node > this node. skipping.\n");
goto next;
}
/* redo the master request, but only for the new node */ /* redo the master request, but only for the new node */
mlog(0, "sending request to new node\n"); mlog(0, "sending request to new node\n");
...@@ -2005,6 +1998,15 @@ int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, ...@@ -2005,6 +1998,15 @@ int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
break; break;
mlog(0, "timed out during migration\n"); mlog(0, "timed out during migration\n");
/* avoid hang during shutdown when migrating lockres
* to a node which also goes down */
if (dlm_is_node_dead(dlm, target)) {
mlog(0, "%s:%.*s: expected migration target %u "
"is no longer up. restarting.\n",
dlm->name, res->lockname.len,
res->lockname.name, target);
ret = -ERESTARTSYS;
}
} }
if (ret == -ERESTARTSYS) { if (ret == -ERESTARTSYS) {
/* migration failed, detach and clean up mle */ /* migration failed, detach and clean up mle */
......
This diff is collapsed.
...@@ -188,6 +188,19 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, ...@@ -188,6 +188,19 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
actions &= ~(DLM_UNLOCK_REMOVE_LOCK| actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
DLM_UNLOCK_REGRANT_LOCK| DLM_UNLOCK_REGRANT_LOCK|
DLM_UNLOCK_CLEAR_CONVERT_TYPE); DLM_UNLOCK_CLEAR_CONVERT_TYPE);
} else if (status == DLM_RECOVERING ||
status == DLM_MIGRATING ||
status == DLM_FORWARD) {
/* must clear the actions because this unlock
* is about to be retried. cannot free or do
* any list manipulation. */
mlog(0, "%s:%.*s: clearing actions, %s\n",
dlm->name, res->lockname.len,
res->lockname.name,
status==DLM_RECOVERING?"recovering":
(status==DLM_MIGRATING?"migrating":
"forward"));
actions = 0;
} }
if (flags & LKM_CANCEL) if (flags & LKM_CANCEL)
lock->cancel_pending = 0; lock->cancel_pending = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment