!18617 fixed log error

Merge pull request !18617 from anancds/ssl
This commit is contained in:
i-robot 2021-06-22 06:56:02 +00:00 committed by Gitee
commit 08f7e99c9e
7 changed files with 22 additions and 4 deletions

View File

@ -544,7 +544,11 @@ void GPUSession::UpdateOutputTensors(const VectorRef *outputs,
// address, to avoid that the device address context of tensor be rewritten in the next step or next loop.
// But one time memory application scenarios need to be skipped, because the memory is not allocated next step:
// 1. Non cnode 2. Communication kernel.
if (node->isa<CNode>() && !AnfAlgo::IsCommunicationOp(node) && !ps::PSContext::instance()->is_ps_mode()) {
bool ps_mode = false;
#if (ENABLE_CPU && !_WIN32)
ps_mode = ps::PSContext::instance()->is_ps_mode();
#endif
if (node->isa<CNode>() && !AnfAlgo::IsCommunicationOp(node) && !ps_mode) {
auto new_address = std::make_shared<device::gpu::GPUDeviceAddress>(nullptr, address->GetSize());
AnfAlgo::SetOutputAddr(new_address, output_index, node.get());
if (context::GraphKernelFlags::GetInstance().IsEnableGraphKernel()) {

View File

@ -44,6 +44,14 @@ namespace core {
HttpServer::~HttpServer() { Stop(); }
bool HttpServer::InitServer() {
if (server_address_ == "") {
MS_LOG(INFO) << "The server ip is empty.";
std::string interface;
std::string server_ip;
CommUtil::GetAvailableInterfaceAndIP(&interface, &server_ip);
server_address_ = server_ip;
}
if (!CommUtil::CheckIp(server_address_)) {
MS_LOG(ERROR) << "The http server ip:" << server_address_ << " is illegal!";
return false;

View File

@ -21,7 +21,7 @@ namespace ps {
namespace core {
bool FileConfiguration::Initialize() {
if (!CommUtil::IsFileExists(file_path_)) {
MS_LOG(ERROR) << "The file path:" << file_path_ << " is not exist.";
MS_LOG(INFO) << "The file path:" << file_path_ << " is not exist.";
return false;
}

View File

@ -25,6 +25,10 @@ uint32_t Node::rank_id() const { return node_info_.rank_id_; }
NodeRole Node::role() const { return node_info_.node_role_; }
uint16_t Node::BoundPort() const { return node_info_.port_; }
std::string Node::BoundIp() const { return node_info_.ip_; }
bool Node::WaitForStart(const uint32_t &timeout) {
std::unique_lock<std::mutex> lock(wait_start_mutex_);
bool res = wait_start_cond_.wait_for(lock, std::chrono::seconds(timeout), [&] {

View File

@ -64,6 +64,8 @@ class Node {
std::string node_id() const;
uint32_t rank_id() const;
NodeRole role() const;
uint16_t BoundPort() const;
std::string BoundIp() const;
bool Wait(uint64_t request_id, const uint32_t &timeout = kCommTimeoutInSeconds);

View File

@ -36,7 +36,7 @@ enum class ClusterEvent {
};
struct NodeInfo {
NodeInfo() : port_(0), node_role_(NodeRole::SCHEDULER), rank_id_(0), is_alive(false) {}
NodeInfo() : ip_(""), port_(0), node_role_(NodeRole::SCHEDULER), rank_id_(0), is_alive(false) {}
// ip
std::string ip_;
// the port of this node

View File

@ -154,7 +154,7 @@ bool Server::InitCommunicatorWithWorker() {
communicators_with_worker_.push_back(tcp_comm);
}
if (use_http_) {
auto http_comm = server_node_->GetOrCreateHttpComm("0.0.0.0", http_port_, task_executor_);
auto http_comm = server_node_->GetOrCreateHttpComm(server_node_->BoundIp(), http_port_, task_executor_);
MS_EXCEPTION_IF_NULL(http_comm);
communicators_with_worker_.push_back(http_comm);
}