Add ps module in batches

This commit is contained in:
ZPaC 2020-07-09 11:12:35 +08:00
parent 4bdd8e16a2
commit 4645a43e08
7 changed files with 251 additions and 10 deletions

View File

@ -1,4 +1,5 @@
file(GLOB_RECURSE _PARALLEL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") file(GLOB_RECURSE _PARALLEL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
list(REMOVE_ITEM _PARALLEL_SRC_FILES "ps/util.cc" "ps/scheduler.cc")
if (ENABLE_DUMP_PROTO) if (ENABLE_DUMP_PROTO)
list(REMOVE_ITEM _PARALLEL_SRC_FILES "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc") list(REMOVE_ITEM _PARALLEL_SRC_FILES "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
endif () endif ()

View File

@ -0,0 +1,32 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "parallel/ps/scheduler.h"
#include <unistd.h>
#include "ps/ps.h"
namespace mindspore {
namespace parallel {
namespace ps {
void Scheduler::Run() {
::ps::Start(0);
while (true) {
sleep(1);
}
}
} // namespace ps
} // namespace parallel
} // namespace mindspore

View File

@ -0,0 +1,40 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
namespace mindspore {
namespace parallel {
namespace ps {
class Scheduler {
public:
static Scheduler &GetInstance() {
static Scheduler instance;
return instance;
}
void Run();
private:
Scheduler() = default;
~Scheduler() = default;
Scheduler(const Scheduler &) = delete;
Scheduler &operator=(const Scheduler &) = delete;
};
} // namespace ps
} // namespace parallel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_

View File

@ -0,0 +1,128 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "parallel/ps/util.h"
#include <unordered_map>
#include "parallel/ps/common.h"
#include "common/utils.h"
namespace mindspore {
namespace parallel {
namespace ps {
std::unordered_map<std::string, int> Util::optimizer_to_ids{
{kApplyMomentum, 0},
{kSparseAdam, 1},
{kSparseFtrl, 2},
};
std::unordered_map<int, std::string> Util::id_to_optimizers{
{0, kApplyMomentum},
{1, kSparseAdam},
{2, kSparseFtrl},
};
bool Util::IsParamServerMode() { return IsRoleOfWorker() || IsRoleOfPServer() || IsRoleOfScheduler(); }
bool Util::IsRoleOfWorker() {
auto role = common::GetEnv(kEnvRole);
if (strcmp(role.c_str(), kEnvRoleOfWorker) == 0) {
return true;
} else {
return false;
}
}
bool Util::IsRoleOfPServer() {
auto role = common::GetEnv(kEnvRole);
if (strcmp(role.c_str(), kEnvRoleOfPServer) == 0) {
return true;
} else {
return false;
}
}
bool Util::IsRoleOfScheduler() {
auto role = common::GetEnv(kEnvRole);
if (strcmp(role.c_str(), kEnvRoleOfScheduler) == 0) {
return true;
} else {
return false;
}
}
void Util::SetInternalEnvVar() {
if (IsParamServerMode()) {
auto comm_type = common::GetEnv(kEnvCommType);
if (comm_type.size() > 0) {
(void)common::SetEnv(kDmlcCommType, comm_type.c_str());
}
auto interface = common::GetEnv(kEnvInterface);
if (interface.size() > 0) {
(void)common::SetEnv(kDmlcInterface, interface.c_str());
}
auto server_num = common::GetEnv(kEnvPServerNum);
if (server_num.size() > 0) {
(void)common::SetEnv(kDmlcPServerNum, server_num.c_str());
}
auto worker_num = common::GetEnv(kEnvWorkerNum);
if (worker_num.size() > 0) {
(void)common::SetEnv(kDmlcWorkerNum, worker_num.c_str());
}
if (IsRoleOfScheduler()) {
(void)common::SetEnv(kDmlcRole, kRoleOfScheduler);
} else if (IsRoleOfPServer()) {
(void)common::SetEnv(kDmlcRole, kRoleOfPServer);
} else if (IsRoleOfWorker()) {
(void)common::SetEnv(kDmlcRole, kRoleOfWorker);
}
auto scheduler_host = common::GetEnv(kEnvSchedulerHost);
if (scheduler_host.size() > 0) {
(void)common::SetEnv(kDmlcSchedulerHost, scheduler_host.c_str());
}
auto scheduler_port = common::GetEnv(kEnvSchedulerPort);
if (scheduler_port.size() > 0) {
(void)common::SetEnv(kDmlcSchedulerPort, scheduler_port.c_str());
}
}
}
int Util::optimizer_id(std::string name) {
if (optimizer_to_ids.count(name) > 0) {
return optimizer_to_ids[name];
}
return -1;
}
std::string Util::optimizer_name(int id) {
if (id_to_optimizers.count(id) > 0) {
return id_to_optimizers[id];
}
return "";
}
bool Util::is_optimizer(std::string name) { return optimizer_to_ids.count(name) > 0; }
int Util::LocalShard(int first_dim, int rank_id, int server_num) {
int shard_size = std::round((static_cast<float>(first_dim)) / server_num);
int remain_size = first_dim % server_num;
if (remain_size == 0 || rank_id < server_num - 1) {
return shard_size;
} else {
return first_dim - (shard_size * (server_num - 1));
}
}
} // namespace ps
} // namespace parallel
} // namespace mindspore

View File

@ -0,0 +1,47 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
#include <map>
#include <string>
#include <unordered_map>
#include "session/anf_runtime_algorithm.h"
namespace mindspore {
namespace parallel {
namespace ps {
class Util {
public:
static bool IsParamServerMode();
static bool IsRoleOfWorker();
static bool IsRoleOfPServer();
static bool IsRoleOfScheduler();
static void SetInternalEnvVar();
static int optimizer_id(std::string name);
static std::string optimizer_name(int id);
static bool is_optimizer(std::string name);
static int LocalShard(int first_dim, int rank_id, int server_num);
private:
static std::unordered_map<std::string, int> optimizer_to_ids;
static std::unordered_map<int, std::string> id_to_optimizers;
};
} // namespace ps
} // namespace parallel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_

View File

@ -115,6 +115,8 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/dump_proto.cc") list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/dump_proto.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ir/lite/tensor.cc") list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ir/lite/tensor.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc") list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/ps/util.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/ps/scheduler.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/anf_ir.pb.cc") list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/anf_ir.pb.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/node_strategy.pb.cc") list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/node_strategy.pb.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc") list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc")

View File

@ -12,16 +12,7 @@ diff -Npur ps-lite-master/include/dmlc/base.h ps-lite-master-new/include/dmlc/ba
/*! /*!
diff -Npur ps-lite-master/include/dmlc/logging.h ps-lite-master-new/include/dmlc/logging.h diff -Npur ps-lite-master/include/dmlc/logging.h ps-lite-master-new/include/dmlc/logging.h
--- ps-lite-master/include/dmlc/logging.h 2020-02-29 13:59:55.000000000 +0800 --- ps-lite-master/include/dmlc/logging.h 2020-02-29 13:59:55.000000000 +0800
+++ ps-lite-master-new/include/dmlc/logging.h 2020-07-01 11:58:00.015919207 +0800 +++ ps-lite-master-new/include/dmlc/logging.h 2020-07-08 21:35:33.334584767 +0800
@@ -13,7 +13,7 @@
#include <string>
#include <vector>
#include <memory>
-#include "./base.h"
+//#include "./base.h"
#if DMLC_LOG_STACK_TRACE
#include <cxxabi.h>
@@ -52,7 +52,7 @@ struct Error : public std::runtime_error @@ -52,7 +52,7 @@ struct Error : public std::runtime_error
namespace dmlc { namespace dmlc {