!36107 Remove redundant tcp link reconnection operations

Merge pull request !36107 from chengang/bugfix_reduce_rpc_retry
This commit is contained in:
i-robot 2022-06-18 01:55:47 +00:00 committed by Gitee
commit f2dd16abe3
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
2 changed files with 20 additions and 21 deletions

View File

@ -67,7 +67,8 @@ enum class MessageName {
}; };
// The retry and interval configuration used for the macro `EXECUTE_WITH_RETRY`. // The retry and interval configuration used for the macro `EXECUTE_WITH_RETRY`.
static const size_t kExecuteRetryNum = 60; static const size_t kExecuteRetryNum = 10;
static const size_t kNoRetry = 1;
static const uint32_t kExecuteInterval = 10; static const uint32_t kExecuteInterval = 10;
#define EXECUTE_WITH_RETRY(func, retry, interval, err_msg) \ #define EXECUTE_WITH_RETRY(func, retry, interval, err_msg) \

View File

@ -98,15 +98,15 @@ bool ComputeGraphNode::Finalize(bool force) {
bool ComputeGraphNode::Register() { bool ComputeGraphNode::Register() {
MS_EXCEPTION_IF_NULL(hb_client_); MS_EXCEPTION_IF_NULL(hb_client_);
const auto &server_url = meta_server_addr_.GetUrl(); const auto &server_url = meta_server_addr_.GetUrl();
RETURN_IF_FALSE_WITH_LOG(hb_client_->Disconnect(server_url), if (!hb_client_->IsConnected(server_url)) {
"Failed to disconnect from the meta server node url: " << server_url);
RETURN_IF_FALSE_WITH_LOG(hb_client_->Connect(server_url), RETURN_IF_FALSE_WITH_LOG(hb_client_->Connect(server_url),
"Failed to connect to the meta server node url: " << server_url); "Failed to connect to the meta server node url: " << server_url);
}
RETURN_IF_FALSE_WITH_LOG(tcp_client_->Disconnect(server_url), if (!tcp_client_->IsConnected(server_url)) {
"Failed to disconnect from the meta server node url: " << server_url);
RETURN_IF_FALSE_WITH_LOG(tcp_client_->Connect(server_url), RETURN_IF_FALSE_WITH_LOG(tcp_client_->Connect(server_url),
"Failed to connect to the meta server node url: " << server_url); "Failed to connect to the meta server node url: " << server_url);
}
RegistrationMessage reg_msg; RegistrationMessage reg_msg;
reg_msg.set_node_id(node_id_); reg_msg.set_node_id(node_id_);
@ -213,9 +213,8 @@ bool ComputeGraphNode::ReconnectIfNeeded(std::function<bool(void)> func, const s
if (!success) { if (!success) {
// Retry to reconnect to the meta server. // Retry to reconnect to the meta server.
MS_LOG(ERROR) << error; MS_LOG(ERROR) << error;
while (!Reconnect()) { sleep(kExecuteInterval);
continue; (void)Reconnect();
}
} }
} }
return success; return success;
@ -232,17 +231,16 @@ bool ComputeGraphNode::Reconnect() {
} }
// Reconnect to the meta server node. // Reconnect to the meta server node.
const size_t retry = 3; if (!tcp_client_->IsConnected(server_url)) {
size_t total_retry = retry; tcp_client_->Connect(server_url, kNoRetry);
const size_t connect_retry = retry;
while (!tcp_client_->IsConnected(server_url) && total_retry-- > 0) {
tcp_client_->Connect(server_url, connect_retry);
} }
total_retry = retry; if (!tcp_client_->IsConnected(server_url)) {
while (!hb_client_->IsConnected(server_url) && total_retry-- > 0) { return false;
hb_client_->Connect(server_url, connect_retry);
} }
return tcp_client_->IsConnected(server_url) && hb_client_->IsConnected(server_url); if (!hb_client_->IsConnected(server_url)) {
hb_client_->Connect(server_url, kNoRetry);
}
return hb_client_->IsConnected(server_url);
} }
bool ComputeGraphNode::SendMessageToMSN(const std::string msg_name, const std::string &msg_body, bool sync) { bool ComputeGraphNode::SendMessageToMSN(const std::string msg_name, const std::string &msg_body, bool sync) {