ocfs2: print node # when tcp fails

Print the node number of a peer node if sending it a message failed.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
This commit is contained in:
Wengang Wang 2010-03-30 12:09:22 +08:00 committed by Joel Becker
parent 83f92318fa
commit a5196ec5ef
7 changed files with 51 additions and 22 deletions

View File

@ -453,7 +453,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen, ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
lock->ml.node, &status); lock->ml.node, &status);
if (ret < 0) if (ret < 0)
mlog_errno(ret); mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key,
lock->ml.node);
else { else {
if (status == DLM_RECOVERING) { if (status == DLM_RECOVERING) {
mlog(ML_ERROR, "sent AST to node %u, it thinks this " mlog(ML_ERROR, "sent AST to node %u, it thinks this "

View File

@ -391,7 +391,9 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
} else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED) } else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED)
dlm_error(ret); dlm_error(ret);
} else { } else {
mlog_errno(tmpret); mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", tmpret, DLM_CONVERT_LOCK_MSG, dlm->key,
res->owner);
if (dlm_is_host_down(tmpret)) { if (dlm_is_host_down(tmpret)) {
/* instead of logging the same network error over /* instead of logging the same network error over
* and over, sleep here and wait for the heartbeat * and over, sleep here and wait for the heartbeat

View File

@ -565,7 +565,9 @@ static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm,
status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key, status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key,
&leave_msg, sizeof(leave_msg), node, &leave_msg, sizeof(leave_msg), node,
NULL); NULL);
if (status < 0)
mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node);
mlog(0, "status return %d from o2net_send_message\n", status); mlog(0, "status return %d from o2net_send_message\n", status);
return status; return status;
@ -962,7 +964,9 @@ static int dlm_send_one_join_cancel(struct dlm_ctxt *dlm,
&cancel_msg, sizeof(cancel_msg), node, &cancel_msg, sizeof(cancel_msg), node,
NULL); NULL);
if (status < 0) { if (status < 0) {
mlog_errno(status); mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", status, DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY,
node);
goto bail; goto bail;
} }
@ -1029,10 +1033,11 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES); byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
sizeof(join_msg), node, sizeof(join_msg), node, &join_resp);
&join_resp);
if (status < 0 && status != -ENOPROTOOPT) { if (status < 0 && status != -ENOPROTOOPT) {
mlog_errno(status); mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", status, DLM_QUERY_JOIN_MSG, DLM_MOD_KEY,
node);
goto bail; goto bail;
} }
dlm_query_join_wire_to_packet(join_resp, &packet); dlm_query_join_wire_to_packet(join_resp, &packet);
@ -1103,7 +1108,9 @@ static int dlm_send_one_join_assert(struct dlm_ctxt *dlm,
&assert_msg, sizeof(assert_msg), node, &assert_msg, sizeof(assert_msg), node,
NULL); NULL);
if (status < 0) if (status < 0)
mlog_errno(status); mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", status, DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY,
node);
return status; return status;
} }

View File

@ -329,7 +329,9 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
BUG(); BUG();
} }
} else { } else {
mlog_errno(tmpret); mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", tmpret, DLM_CREATE_LOCK_MSG, dlm->key,
res->owner);
if (dlm_is_host_down(tmpret)) { if (dlm_is_host_down(tmpret)) {
ret = DLM_RECOVERING; ret = DLM_RECOVERING;
mlog(0, "node %u died so returning DLM_RECOVERING " mlog(0, "node %u died so returning DLM_RECOVERING "

View File

@ -1666,7 +1666,9 @@ again:
tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key, tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key,
&assert, sizeof(assert), to, &r); &assert, sizeof(assert), to, &r);
if (tmpret < 0) { if (tmpret < 0) {
mlog(0, "assert_master returned %d!\n", tmpret); mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", tmpret,
DLM_ASSERT_MASTER_MSG, dlm->key, to);
if (!dlm_is_host_down(tmpret)) { if (!dlm_is_host_down(tmpret)) {
mlog(ML_ERROR, "unhandled error=%d!\n", tmpret); mlog(ML_ERROR, "unhandled error=%d!\n", tmpret);
BUG(); BUG();
@ -2207,7 +2209,9 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key, ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key,
&deref, sizeof(deref), res->owner, &r); &deref, sizeof(deref), res->owner, &r);
if (ret < 0) if (ret < 0)
mlog_errno(ret); mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", ret, DLM_DEREF_LOCKRES_MSG, dlm->key,
res->owner);
else if (r < 0) { else if (r < 0) {
/* BAD. other node says I did not have a ref. */ /* BAD. other node says I did not have a ref. */
mlog(ML_ERROR,"while dropping ref on %s:%.*s " mlog(ML_ERROR,"while dropping ref on %s:%.*s "
@ -2977,7 +2981,9 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
&migrate, sizeof(migrate), nodenum, &migrate, sizeof(migrate), nodenum,
&status); &status);
if (ret < 0) { if (ret < 0) {
mlog(0, "migrate_request returned %d!\n", ret); mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", ret, DLM_MIGRATE_REQUEST_MSG,
dlm->key, nodenum);
if (!dlm_is_host_down(ret)) { if (!dlm_is_host_down(ret)) {
mlog(ML_ERROR, "unhandled error=%d!\n", ret); mlog(ML_ERROR, "unhandled error=%d!\n", ret);
BUG(); BUG();

View File

@ -803,7 +803,9 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
/* negative status is handled by caller */ /* negative status is handled by caller */
if (ret < 0) if (ret < 0)
mlog_errno(ret); mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", ret, DLM_LOCK_REQUEST_MSG,
dlm->key, request_from);
// return from here, then // return from here, then
// sleep until all received or error // sleep until all received or error
@ -955,10 +957,10 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
sizeof(done_msg), send_to, &tmpret); sizeof(done_msg), send_to, &tmpret);
if (ret < 0) { if (ret < 0) {
mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", ret, DLM_RECO_DATA_DONE_MSG,
dlm->key, send_to);
if (!dlm_is_host_down(ret)) { if (!dlm_is_host_down(ret)) {
mlog_errno(ret);
mlog(ML_ERROR, "%s: unknown error sending data-done "
"to %u\n", dlm->name, send_to);
BUG(); BUG();
} }
} else } else
@ -1126,7 +1128,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
if (ret < 0) { if (ret < 0) {
/* XXX: negative status is not handled. /* XXX: negative status is not handled.
* this will end up killing this node. */ * this will end up killing this node. */
mlog_errno(ret); mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", ret, DLM_MIG_LOCKRES_MSG,
dlm->key, send_to);
} else { } else {
/* might get an -ENOMEM back here */ /* might get an -ENOMEM back here */
ret = status; ret = status;
@ -1642,7 +1646,9 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
&req, sizeof(req), nodenum, &status); &req, sizeof(req), nodenum, &status);
/* XXX: negative status not handled properly here. */ /* XXX: negative status not handled properly here. */
if (ret < 0) if (ret < 0)
mlog_errno(ret); mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG,
dlm->key, nodenum);
else { else {
BUG_ON(status < 0); BUG_ON(status < 0);
BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN); BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN);
@ -2640,7 +2646,7 @@ retry:
if (dlm_is_host_down(ret)) { if (dlm_is_host_down(ret)) {
/* node is down. not involved in recovery /* node is down. not involved in recovery
* so just keep going */ * so just keep going */
mlog(0, "%s: node %u was down when sending " mlog(ML_NOTICE, "%s: node %u was down when sending "
"begin reco msg (%d)\n", dlm->name, nodenum, ret); "begin reco msg (%d)\n", dlm->name, nodenum, ret);
ret = 0; ret = 0;
} }
@ -2660,6 +2666,7 @@ retry:
} }
if (ret < 0) { if (ret < 0) {
struct dlm_lock_resource *res; struct dlm_lock_resource *res;
/* this is now a serious problem, possibly ENOMEM /* this is now a serious problem, possibly ENOMEM
* in the network stack. must retry */ * in the network stack. must retry */
mlog_errno(ret); mlog_errno(ret);
@ -2789,7 +2796,9 @@ stage2:
if (ret >= 0) if (ret >= 0)
ret = status; ret = status;
if (ret < 0) { if (ret < 0) {
mlog_errno(ret); mlog(ML_ERROR, "Error %d when sending message %u (key "
"0x%x) to node %u\n", ret, DLM_FINALIZE_RECO_MSG,
dlm->key, nodenum);
if (dlm_is_host_down(ret)) { if (dlm_is_host_down(ret)) {
/* this has no effect on this recovery /* this has no effect on this recovery
* session, so set the status to zero to * session, so set the status to zero to

View File

@ -355,7 +355,8 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
mlog(0, "master was in-progress. retry\n"); mlog(0, "master was in-progress. retry\n");
ret = status; ret = status;
} else { } else {
mlog_errno(tmpret); mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
"node %u\n", tmpret, DLM_UNLOCK_LOCK_MSG, dlm->key, owner);
if (dlm_is_host_down(tmpret)) { if (dlm_is_host_down(tmpret)) {
/* NOTE: this seems strange, but it is what we want. /* NOTE: this seems strange, but it is what we want.
* when the master goes down during a cancel or * when the master goes down during a cancel or