Add ps module in batches
This commit is contained in:
parent
4bdd8e16a2
commit
4645a43e08
|
@ -1,4 +1,5 @@
|
|||
file(GLOB_RECURSE _PARALLEL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
list(REMOVE_ITEM _PARALLEL_SRC_FILES "ps/util.cc" "ps/scheduler.cc")
|
||||
if (ENABLE_DUMP_PROTO)
|
||||
list(REMOVE_ITEM _PARALLEL_SRC_FILES "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
|
||||
endif ()
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "parallel/ps/scheduler.h"
|
||||
#include <unistd.h>
|
||||
#include "ps/ps.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace parallel {
|
||||
namespace ps {
|
||||
void Scheduler::Run() {
|
||||
::ps::Start(0);
|
||||
while (true) {
|
||||
sleep(1);
|
||||
}
|
||||
}
|
||||
} // namespace ps
|
||||
} // namespace parallel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,40 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
|
||||
namespace mindspore {
|
||||
namespace parallel {
|
||||
namespace ps {
|
||||
class Scheduler {
|
||||
public:
|
||||
static Scheduler &GetInstance() {
|
||||
static Scheduler instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
void Run();
|
||||
|
||||
private:
|
||||
Scheduler() = default;
|
||||
~Scheduler() = default;
|
||||
Scheduler(const Scheduler &) = delete;
|
||||
Scheduler &operator=(const Scheduler &) = delete;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace parallel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
|
|
@ -0,0 +1,128 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "parallel/ps/util.h"
|
||||
#include <unordered_map>
|
||||
#include "parallel/ps/common.h"
|
||||
#include "common/utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace parallel {
|
||||
namespace ps {
|
||||
std::unordered_map<std::string, int> Util::optimizer_to_ids{
|
||||
{kApplyMomentum, 0},
|
||||
{kSparseAdam, 1},
|
||||
{kSparseFtrl, 2},
|
||||
};
|
||||
|
||||
std::unordered_map<int, std::string> Util::id_to_optimizers{
|
||||
{0, kApplyMomentum},
|
||||
{1, kSparseAdam},
|
||||
{2, kSparseFtrl},
|
||||
};
|
||||
bool Util::IsParamServerMode() { return IsRoleOfWorker() || IsRoleOfPServer() || IsRoleOfScheduler(); }
|
||||
|
||||
bool Util::IsRoleOfWorker() {
|
||||
auto role = common::GetEnv(kEnvRole);
|
||||
if (strcmp(role.c_str(), kEnvRoleOfWorker) == 0) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool Util::IsRoleOfPServer() {
|
||||
auto role = common::GetEnv(kEnvRole);
|
||||
if (strcmp(role.c_str(), kEnvRoleOfPServer) == 0) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool Util::IsRoleOfScheduler() {
|
||||
auto role = common::GetEnv(kEnvRole);
|
||||
if (strcmp(role.c_str(), kEnvRoleOfScheduler) == 0) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void Util::SetInternalEnvVar() {
|
||||
if (IsParamServerMode()) {
|
||||
auto comm_type = common::GetEnv(kEnvCommType);
|
||||
if (comm_type.size() > 0) {
|
||||
(void)common::SetEnv(kDmlcCommType, comm_type.c_str());
|
||||
}
|
||||
auto interface = common::GetEnv(kEnvInterface);
|
||||
if (interface.size() > 0) {
|
||||
(void)common::SetEnv(kDmlcInterface, interface.c_str());
|
||||
}
|
||||
auto server_num = common::GetEnv(kEnvPServerNum);
|
||||
if (server_num.size() > 0) {
|
||||
(void)common::SetEnv(kDmlcPServerNum, server_num.c_str());
|
||||
}
|
||||
auto worker_num = common::GetEnv(kEnvWorkerNum);
|
||||
if (worker_num.size() > 0) {
|
||||
(void)common::SetEnv(kDmlcWorkerNum, worker_num.c_str());
|
||||
}
|
||||
if (IsRoleOfScheduler()) {
|
||||
(void)common::SetEnv(kDmlcRole, kRoleOfScheduler);
|
||||
} else if (IsRoleOfPServer()) {
|
||||
(void)common::SetEnv(kDmlcRole, kRoleOfPServer);
|
||||
} else if (IsRoleOfWorker()) {
|
||||
(void)common::SetEnv(kDmlcRole, kRoleOfWorker);
|
||||
}
|
||||
auto scheduler_host = common::GetEnv(kEnvSchedulerHost);
|
||||
if (scheduler_host.size() > 0) {
|
||||
(void)common::SetEnv(kDmlcSchedulerHost, scheduler_host.c_str());
|
||||
}
|
||||
auto scheduler_port = common::GetEnv(kEnvSchedulerPort);
|
||||
if (scheduler_port.size() > 0) {
|
||||
(void)common::SetEnv(kDmlcSchedulerPort, scheduler_port.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int Util::optimizer_id(std::string name) {
|
||||
if (optimizer_to_ids.count(name) > 0) {
|
||||
return optimizer_to_ids[name];
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::string Util::optimizer_name(int id) {
|
||||
if (id_to_optimizers.count(id) > 0) {
|
||||
return id_to_optimizers[id];
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
bool Util::is_optimizer(std::string name) { return optimizer_to_ids.count(name) > 0; }
|
||||
|
||||
int Util::LocalShard(int first_dim, int rank_id, int server_num) {
|
||||
int shard_size = std::round((static_cast<float>(first_dim)) / server_num);
|
||||
int remain_size = first_dim % server_num;
|
||||
if (remain_size == 0 || rank_id < server_num - 1) {
|
||||
return shard_size;
|
||||
} else {
|
||||
return first_dim - (shard_size * (server_num - 1));
|
||||
}
|
||||
}
|
||||
} // namespace ps
|
||||
} // namespace parallel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,47 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace parallel {
|
||||
namespace ps {
|
||||
class Util {
|
||||
public:
|
||||
static bool IsParamServerMode();
|
||||
static bool IsRoleOfWorker();
|
||||
static bool IsRoleOfPServer();
|
||||
static bool IsRoleOfScheduler();
|
||||
static void SetInternalEnvVar();
|
||||
static int optimizer_id(std::string name);
|
||||
static std::string optimizer_name(int id);
|
||||
static bool is_optimizer(std::string name);
|
||||
static int LocalShard(int first_dim, int rank_id, int server_num);
|
||||
|
||||
private:
|
||||
static std::unordered_map<std::string, int> optimizer_to_ids;
|
||||
static std::unordered_map<int, std::string> id_to_optimizers;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace parallel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
|
|
@ -115,6 +115,8 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
|||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/dump_proto.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ir/lite/tensor.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/ps/util.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/ps/scheduler.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/anf_ir.pb.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/node_strategy.pb.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc")
|
||||
|
|
|
@ -12,16 +12,7 @@ diff -Npur ps-lite-master/include/dmlc/base.h ps-lite-master-new/include/dmlc/ba
|
|||
/*!
|
||||
diff -Npur ps-lite-master/include/dmlc/logging.h ps-lite-master-new/include/dmlc/logging.h
|
||||
--- ps-lite-master/include/dmlc/logging.h 2020-02-29 13:59:55.000000000 +0800
|
||||
+++ ps-lite-master-new/include/dmlc/logging.h 2020-07-01 11:58:00.015919207 +0800
|
||||
@@ -13,7 +13,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
-#include "./base.h"
|
||||
+//#include "./base.h"
|
||||
|
||||
#if DMLC_LOG_STACK_TRACE
|
||||
#include <cxxabi.h>
|
||||
+++ ps-lite-master-new/include/dmlc/logging.h 2020-07-08 21:35:33.334584767 +0800
|
||||
@@ -52,7 +52,7 @@ struct Error : public std::runtime_error
|
||||
|
||||
namespace dmlc {
|
||||
|
|
Loading…
Reference in New Issue