ocfs2: special case recovery lock in dlmlock_remote()
If the previous master of the recovery lock dies, let calc_usage take it down completely and let the caller completely redo the dlmlock() call. Otherwise, there will never be an opportunity to re-master the lockres and recovery wont be able to progress. Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
This commit is contained in:
parent
36407488b1
commit
c8df412e1c
|
@ -227,7 +227,16 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
|
||||||
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
|
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
|
||||||
lock->lock_pending = 0;
|
lock->lock_pending = 0;
|
||||||
if (status != DLM_NORMAL) {
|
if (status != DLM_NORMAL) {
|
||||||
if (status != DLM_NOTQUEUED) {
|
if (status == DLM_RECOVERING &&
|
||||||
|
dlm_is_recovery_lock(res->lockname.name,
|
||||||
|
res->lockname.len)) {
|
||||||
|
/* recovery lock was mastered by dead node.
|
||||||
|
* we need to have calc_usage shoot down this
|
||||||
|
* lockres and completely remaster it. */
|
||||||
|
mlog(0, "%s: recovery lock was owned by "
|
||||||
|
"dead node %u, remaster it now.\n",
|
||||||
|
dlm->name, res->owner);
|
||||||
|
} else if (status != DLM_NOTQUEUED) {
|
||||||
/*
|
/*
|
||||||
* DO NOT call calc_usage, as this would unhash
|
* DO NOT call calc_usage, as this would unhash
|
||||||
* the remote lockres before we ever get to use
|
* the remote lockres before we ever get to use
|
||||||
|
@ -691,18 +700,22 @@ retry_lock:
|
||||||
msleep(100);
|
msleep(100);
|
||||||
/* no waiting for dlm_reco_thread */
|
/* no waiting for dlm_reco_thread */
|
||||||
if (recovery) {
|
if (recovery) {
|
||||||
if (status == DLM_RECOVERING) {
|
if (status != DLM_RECOVERING)
|
||||||
mlog(0, "%s: got RECOVERING "
|
goto retry_lock;
|
||||||
"for $REOCVERY lock, master "
|
|
||||||
"was %u\n", dlm->name,
|
mlog(0, "%s: got RECOVERING "
|
||||||
res->owner);
|
"for $RECOVERY lock, master "
|
||||||
dlm_wait_for_node_death(dlm, res->owner,
|
"was %u\n", dlm->name,
|
||||||
DLM_NODE_DEATH_WAIT_MAX);
|
res->owner);
|
||||||
}
|
/* wait to see the node go down, then
|
||||||
|
* drop down and allow the lockres to
|
||||||
|
* get cleaned up. need to remaster. */
|
||||||
|
dlm_wait_for_node_death(dlm, res->owner,
|
||||||
|
DLM_NODE_DEATH_WAIT_MAX);
|
||||||
} else {
|
} else {
|
||||||
dlm_wait_for_recovery(dlm);
|
dlm_wait_for_recovery(dlm);
|
||||||
|
goto retry_lock;
|
||||||
}
|
}
|
||||||
goto retry_lock;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (status != DLM_NORMAL) {
|
if (status != DLM_NORMAL) {
|
||||||
|
|
|
@ -2314,6 +2314,10 @@ again:
|
||||||
mlog(0, "%s: reco master %u is ready to recover %u\n",
|
mlog(0, "%s: reco master %u is ready to recover %u\n",
|
||||||
dlm->name, dlm->reco.new_master, dlm->reco.dead_node);
|
dlm->name, dlm->reco.new_master, dlm->reco.dead_node);
|
||||||
status = -EEXIST;
|
status = -EEXIST;
|
||||||
|
} else if (ret == DLM_RECOVERING) {
|
||||||
|
mlog(0, "dlm=%s dlmlock says master node died (this=%u)\n",
|
||||||
|
dlm->name, dlm->node_num);
|
||||||
|
goto again;
|
||||||
} else {
|
} else {
|
||||||
struct dlm_lock_resource *res;
|
struct dlm_lock_resource *res;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue