summary refs log tree commit diff
path: root/fs
diff options
context:
space:
mode:
authorSunil Mushran <sunil.mushran@oracle.com>2007-01-29 15:44:27 -0800
committerMark Fasheh <mark.fasheh@oracle.com>2007-02-07 12:10:39 -0800
commit0dd82141b236ce36253e3056c6068ee3d5732196 (patch)
tree51c4c4746ffa390d4dba6a342aeaa526a35cb4eb /fs
parente4968476a9bc5a6b30076076b4f3ce3e692e0d79 (diff)
downloadlinux-0dd82141b236ce36253e3056c6068ee3d5732196.tar.gz
ocfs2_dlm: Add timeout to dlm join domain
Currently the ocfs2 dlm has no timeout during dlm join domain. While this is
not a problem in normal operation, this does become an issue if, say, the
other node is refusing to let the node join the domain because of a stuck
recovery. This patch adds a 90 sec timeout.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c14
1 files changed, 13 insertions, 1 deletions
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index e8ecf8c3dbe7..6087c4749fee 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1264,6 +1264,8 @@ bail:
 static int dlm_join_domain(struct dlm_ctxt *dlm)
 {
 	int status;
+	unsigned int backoff;
+	unsigned int total_backoff = 0;
 
 	BUG_ON(!dlm);
 
@@ -1295,18 +1297,27 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
 	}
 
 	do {
-		unsigned int backoff;
 		status = dlm_try_to_join_domain(dlm);
 
 		/* If we're racing another node to the join, then we
 		 * need to back off temporarily and let them
 		 * complete. */
+#define	DLM_JOIN_TIMEOUT_MSECS	90000
 		if (status == -EAGAIN) {
 			if (signal_pending(current)) {
 				status = -ERESTARTSYS;
 				goto bail;
 			}
 
+			if (total_backoff >
+			    msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
+				status = -ERESTARTSYS;
+				mlog(ML_NOTICE, "Timed out joining dlm domain "
+				     "%s after %u msecs\n", dlm->name,
+				     jiffies_to_msecs(total_backoff));
+				goto bail;
+			}
+
 			/*
 			 * <chip> After you!
 			 * <dale> No, after you!
@@ -1316,6 +1327,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
 			 */
 			backoff = (unsigned int)(jiffies & 0x3);
 			backoff *= DLM_DOMAIN_BACKOFF_MS;
+			total_backoff += backoff;
 			mlog(0, "backoff %d\n", backoff);
 			msleep(backoff);
 		}