forked from mindspore-Ecosystem/mindspore
Add ps module in batches
This commit is contained in:
parent
4bdd8e16a2
commit
4645a43e08
|
@ -1,4 +1,5 @@
|
||||||
file(GLOB_RECURSE _PARALLEL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
file(GLOB_RECURSE _PARALLEL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||||
|
list(REMOVE_ITEM _PARALLEL_SRC_FILES "ps/util.cc" "ps/scheduler.cc")
|
||||||
if (ENABLE_DUMP_PROTO)
|
if (ENABLE_DUMP_PROTO)
|
||||||
list(REMOVE_ITEM _PARALLEL_SRC_FILES "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
|
list(REMOVE_ITEM _PARALLEL_SRC_FILES "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "parallel/ps/scheduler.h"
|
||||||
|
#include <unistd.h>
|
||||||
|
#include "ps/ps.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace parallel {
|
||||||
|
namespace ps {
|
||||||
|
void Scheduler::Run() {
|
||||||
|
::ps::Start(0);
|
||||||
|
while (true) {
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace ps
|
||||||
|
} // namespace parallel
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,40 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
|
||||||
|
#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
|
||||||
|
namespace mindspore {
|
||||||
|
namespace parallel {
|
||||||
|
namespace ps {
|
||||||
|
class Scheduler {
|
||||||
|
public:
|
||||||
|
static Scheduler &GetInstance() {
|
||||||
|
static Scheduler instance;
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Run();
|
||||||
|
|
||||||
|
private:
|
||||||
|
Scheduler() = default;
|
||||||
|
~Scheduler() = default;
|
||||||
|
Scheduler(const Scheduler &) = delete;
|
||||||
|
Scheduler &operator=(const Scheduler &) = delete;
|
||||||
|
};
|
||||||
|
} // namespace ps
|
||||||
|
} // namespace parallel
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_SCHEDULER_H_
|
|
@ -0,0 +1,128 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "parallel/ps/util.h"
|
||||||
|
#include <unordered_map>
|
||||||
|
#include "parallel/ps/common.h"
|
||||||
|
#include "common/utils.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace parallel {
|
||||||
|
namespace ps {
|
||||||
|
std::unordered_map<std::string, int> Util::optimizer_to_ids{
|
||||||
|
{kApplyMomentum, 0},
|
||||||
|
{kSparseAdam, 1},
|
||||||
|
{kSparseFtrl, 2},
|
||||||
|
};
|
||||||
|
|
||||||
|
std::unordered_map<int, std::string> Util::id_to_optimizers{
|
||||||
|
{0, kApplyMomentum},
|
||||||
|
{1, kSparseAdam},
|
||||||
|
{2, kSparseFtrl},
|
||||||
|
};
|
||||||
|
bool Util::IsParamServerMode() { return IsRoleOfWorker() || IsRoleOfPServer() || IsRoleOfScheduler(); }
|
||||||
|
|
||||||
|
bool Util::IsRoleOfWorker() {
|
||||||
|
auto role = common::GetEnv(kEnvRole);
|
||||||
|
if (strcmp(role.c_str(), kEnvRoleOfWorker) == 0) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Util::IsRoleOfPServer() {
|
||||||
|
auto role = common::GetEnv(kEnvRole);
|
||||||
|
if (strcmp(role.c_str(), kEnvRoleOfPServer) == 0) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Util::IsRoleOfScheduler() {
|
||||||
|
auto role = common::GetEnv(kEnvRole);
|
||||||
|
if (strcmp(role.c_str(), kEnvRoleOfScheduler) == 0) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Util::SetInternalEnvVar() {
|
||||||
|
if (IsParamServerMode()) {
|
||||||
|
auto comm_type = common::GetEnv(kEnvCommType);
|
||||||
|
if (comm_type.size() > 0) {
|
||||||
|
(void)common::SetEnv(kDmlcCommType, comm_type.c_str());
|
||||||
|
}
|
||||||
|
auto interface = common::GetEnv(kEnvInterface);
|
||||||
|
if (interface.size() > 0) {
|
||||||
|
(void)common::SetEnv(kDmlcInterface, interface.c_str());
|
||||||
|
}
|
||||||
|
auto server_num = common::GetEnv(kEnvPServerNum);
|
||||||
|
if (server_num.size() > 0) {
|
||||||
|
(void)common::SetEnv(kDmlcPServerNum, server_num.c_str());
|
||||||
|
}
|
||||||
|
auto worker_num = common::GetEnv(kEnvWorkerNum);
|
||||||
|
if (worker_num.size() > 0) {
|
||||||
|
(void)common::SetEnv(kDmlcWorkerNum, worker_num.c_str());
|
||||||
|
}
|
||||||
|
if (IsRoleOfScheduler()) {
|
||||||
|
(void)common::SetEnv(kDmlcRole, kRoleOfScheduler);
|
||||||
|
} else if (IsRoleOfPServer()) {
|
||||||
|
(void)common::SetEnv(kDmlcRole, kRoleOfPServer);
|
||||||
|
} else if (IsRoleOfWorker()) {
|
||||||
|
(void)common::SetEnv(kDmlcRole, kRoleOfWorker);
|
||||||
|
}
|
||||||
|
auto scheduler_host = common::GetEnv(kEnvSchedulerHost);
|
||||||
|
if (scheduler_host.size() > 0) {
|
||||||
|
(void)common::SetEnv(kDmlcSchedulerHost, scheduler_host.c_str());
|
||||||
|
}
|
||||||
|
auto scheduler_port = common::GetEnv(kEnvSchedulerPort);
|
||||||
|
if (scheduler_port.size() > 0) {
|
||||||
|
(void)common::SetEnv(kDmlcSchedulerPort, scheduler_port.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int Util::optimizer_id(std::string name) {
|
||||||
|
if (optimizer_to_ids.count(name) > 0) {
|
||||||
|
return optimizer_to_ids[name];
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Util::optimizer_name(int id) {
|
||||||
|
if (id_to_optimizers.count(id) > 0) {
|
||||||
|
return id_to_optimizers[id];
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Util::is_optimizer(std::string name) { return optimizer_to_ids.count(name) > 0; }
|
||||||
|
|
||||||
|
int Util::LocalShard(int first_dim, int rank_id, int server_num) {
|
||||||
|
int shard_size = std::round((static_cast<float>(first_dim)) / server_num);
|
||||||
|
int remain_size = first_dim % server_num;
|
||||||
|
if (remain_size == 0 || rank_id < server_num - 1) {
|
||||||
|
return shard_size;
|
||||||
|
} else {
|
||||||
|
return first_dim - (shard_size * (server_num - 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace ps
|
||||||
|
} // namespace parallel
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,47 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
|
||||||
|
#define MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include "session/anf_runtime_algorithm.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace parallel {
|
||||||
|
namespace ps {
|
||||||
|
class Util {
|
||||||
|
public:
|
||||||
|
static bool IsParamServerMode();
|
||||||
|
static bool IsRoleOfWorker();
|
||||||
|
static bool IsRoleOfPServer();
|
||||||
|
static bool IsRoleOfScheduler();
|
||||||
|
static void SetInternalEnvVar();
|
||||||
|
static int optimizer_id(std::string name);
|
||||||
|
static std::string optimizer_name(int id);
|
||||||
|
static bool is_optimizer(std::string name);
|
||||||
|
static int LocalShard(int first_dim, int rank_id, int server_num);
|
||||||
|
|
||||||
|
private:
|
||||||
|
static std::unordered_map<std::string, int> optimizer_to_ids;
|
||||||
|
static std::unordered_map<int, std::string> id_to_optimizers;
|
||||||
|
};
|
||||||
|
} // namespace ps
|
||||||
|
} // namespace parallel
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_MINDSPORE_CCSRC_PARALLEL_PS_UTIL_H_
|
|
@ -115,6 +115,8 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/dump_proto.cc")
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/dump_proto.cc")
|
||||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ir/lite/tensor.cc")
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ir/lite/tensor.cc")
|
||||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
|
||||||
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/ps/util.cc")
|
||||||
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/parallel/ps/scheduler.cc")
|
||||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/anf_ir.pb.cc")
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/anf_ir.pb.cc")
|
||||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/node_strategy.pb.cc")
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/node_strategy.pb.cc")
|
||||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc")
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc")
|
||||||
|
|
|
@ -12,16 +12,7 @@ diff -Npur ps-lite-master/include/dmlc/base.h ps-lite-master-new/include/dmlc/ba
|
||||||
/*!
|
/*!
|
||||||
diff -Npur ps-lite-master/include/dmlc/logging.h ps-lite-master-new/include/dmlc/logging.h
|
diff -Npur ps-lite-master/include/dmlc/logging.h ps-lite-master-new/include/dmlc/logging.h
|
||||||
--- ps-lite-master/include/dmlc/logging.h 2020-02-29 13:59:55.000000000 +0800
|
--- ps-lite-master/include/dmlc/logging.h 2020-02-29 13:59:55.000000000 +0800
|
||||||
+++ ps-lite-master-new/include/dmlc/logging.h 2020-07-01 11:58:00.015919207 +0800
|
+++ ps-lite-master-new/include/dmlc/logging.h 2020-07-08 21:35:33.334584767 +0800
|
||||||
@@ -13,7 +13,7 @@
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include <memory>
|
|
||||||
-#include "./base.h"
|
|
||||||
+//#include "./base.h"
|
|
||||||
|
|
||||||
#if DMLC_LOG_STACK_TRACE
|
|
||||||
#include <cxxabi.h>
|
|
||||||
@@ -52,7 +52,7 @@ struct Error : public std::runtime_error
|
@@ -52,7 +52,7 @@ struct Error : public std::runtime_error
|
||||||
|
|
||||||
namespace dmlc {
|
namespace dmlc {
|
||||||
|
|
Loading…
Reference in New Issue