ocfs2_dlm: Flush dlm workqueue before starting to migrate

This is to prevent the condition in which a previously queued up assert master asserts after we start the migration. Now migration ensures the workqueue is flushed before proceeding with migrating the lock to another node. This condition is typically encountered during parallel umounts. Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com> Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>

ocfs2_dlm: Flush dlm workqueue before starting to migrate
This is to prevent the condition in which a previously queued up assert master asserts after we start the migration. Now migration ensures the workqueue is flushed before proceeding with migrating the lock to another node. This condition is typically encountered during parallel umounts. Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com> Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
1cd04dbe · Kurt Hackel · Mark Fasheh · e17e75ec · 1cd04dbe
Commit 1cd04dbe authored Jan 17, 2007 by Kurt Hackel Committed by Mark Fasheh Feb 07, 2007
Show whitespace changes
Inline Side-by-side

Showing with 20 additions and 16 deletions

fs/ocfs2/dlm/dlmmaster.c fs/ocfs2/dlm/dlmmaster.c +20 -16

No files found.
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1507,10 +1507,11 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
 		/* take care of the easy cases up front */
 		spin_lock(&res->spinlock);
-		if (res->state & DLM_LOCK_RES_RECOVERING) {
+		if (res->state & (DLM_LOCK_RES_RECOVERING|
+				  DLM_LOCK_RES_MIGRATING)) {
 			spin_unlock(&res->spinlock);
 			mlog(0, "returning DLM_MASTER_RESP_ERROR since res is "
-			     "being recovered\n");
+			     "being recovered/migrated\n");
 			response = DLM_MASTER_RESP_ERROR;
 			if (mle)
 				kmem_cache_free(dlm_mle_cache, mle);
@@ -2493,6 +2494,9 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 	 * the lockres
 	 */
+	/* now that remote nodes are spinning on the MIGRATING flag,
+	 * ensure that all assert_master work is flushed. */
+	flush_workqueue(dlm->dlm_worker);
 	/* get an extra reference on the mle.
 	 * otherwise the assert_master from the new
@@ -2547,7 +2551,8 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 			    res->owner == target)
 				break;
-			mlog(0, "timed out during migration\n");
+			mlog(0, "%s:%.*s: timed out during migration\n",
+			     dlm->name, res->lockname.len, res->lockname.name);
 			/* avoid hang during shutdown when migrating lockres 
 			 * to a node which also goes down */
 			if (dlm_is_node_dead(dlm, target)) {
@@ -2555,10 +2560,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 				     "target %u is no longer up, restarting\n",
 				     dlm->name, res->lockname.len,
 				     res->lockname.name, target);
-				ret = -ERESTARTSYS;
+				ret = -EINVAL;
-			}
-		}
-		if (ret == -ERESTARTSYS) {
 				/* migration failed, detach and clean up mle */
 				dlm_mle_detach_hb_events(dlm, mle);
 				dlm_put_mle(mle);
@@ -2568,7 +2570,9 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 				spin_unlock(&res->spinlock);
 				goto leave;
 			}
-		/* TODO: if node died: stop, clean up, return error */
+		} else
+			mlog(0, "%s:%.*s: caught signal during migration\n",
+			     dlm->name, res->lockname.len, res->lockname.name);
 	}
 	/* all done, set the owner, clear the flag */