forked from mindspore-Ecosystem/mindspore
Purge old ps arch
This commit is contained in:
parent
c3ec281ec2
commit
3bb04bb3f3
|
@ -545,7 +545,6 @@ if(ENABLE_TEST OR ENABLE_TESTCASES)
|
|||
list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/ge/ge_operator_stub.cc)
|
||||
list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/transform/util.cc)
|
||||
list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/pipeline/action_stub.cc)
|
||||
list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/ps/ps_stub.cc)
|
||||
list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/cluster/cluster_stub.cc)
|
||||
list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/profiling/parallel_strategy_profiling_stub.cc)
|
||||
|
||||
|
@ -562,7 +561,6 @@ endif()
|
|||
if(NOT ENABLE_TESTCASES AND NOT (ENABLE_D OR ENABLE_CPU OR ENABLE_GPU))
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
list(APPEND EXPRESSION_STUB_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/cluster/cluster_stub.cc)
|
||||
list(APPEND EXPRESSION_STUB_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/ps/ps_stub.cc)
|
||||
list(APPEND EXPRESSION_STUB_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/fl/fl_stub.cc)
|
||||
list(APPEND EXPRESSION_STUB_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/fl/server_stub.cc)
|
||||
list(APPEND EXPRESSION_STUB_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/ps/ps_core_stub.cc)
|
||||
|
|
|
@ -48,7 +48,7 @@
|
|||
#include "include/common/utils/parallel_context.h"
|
||||
#include "kernel/oplib/oplib.h"
|
||||
#ifdef WITH_BACKEND
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
|
||||
#include "ps/constants.h"
|
||||
#include "ps/util.h"
|
||||
#include "ps/ps_context.h"
|
||||
|
@ -546,16 +546,6 @@ void GetNodeUsedList(const FuncGraphPtr &kernel_graph, const AnfNodePtr &node,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check whether the Parameter initialized in server is used by the operator executed on the device side.
|
||||
bool UseParamInitInServer(const FuncGraphPtr &kernel_graph, const AnfNodePtr ¶m_node) {
|
||||
std::vector<AnfNodePtr> node_users_list;
|
||||
GetNodeUsedList(kernel_graph, param_node, &node_users_list);
|
||||
|
||||
// Check if there is real CNode among all users of the node.
|
||||
return std::any_of(node_users_list.begin(), node_users_list.end(),
|
||||
[](const AnfNodePtr &node) { return AnfUtils::IsRealKernel(node); });
|
||||
}
|
||||
#endif
|
||||
|
||||
void IterateFindTensor(std::vector<ValuePtr> *msTensors, const VectorRef &ref_list) {
|
||||
|
@ -3062,131 +3052,6 @@ void SessionBasic::DumpGraphs(const std::vector<KernelGraphPtr> &graphs) const {
|
|||
}
|
||||
|
||||
void SessionBasic::UnifyMindIR(const KernelGraphPtr &graph) { opt::CommonUnifyMindIR(graph); }
|
||||
|
||||
#ifdef WITH_BACKEND
|
||||
void SessionBasic::InitPsWorker(const KernelGraphPtr &kernel_graph) const {
|
||||
if (!ps::PSContext::instance()->is_worker()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check whether the Parameter initialized in server is used by the operator executed on the device side.
|
||||
CheckPSModeConsistence(kernel_graph);
|
||||
|
||||
if (ps::PsDataPrefetch::GetInstance().cache_enable()) {
|
||||
if (!ps::ps_cache_instance.initialized_ps_cache()) {
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
auto device_target = context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(device_target, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
auto context = runtime_instance->context();
|
||||
const auto &kernels = kernel_graph->execution_order();
|
||||
if (kernels.size() > 0 && common::AnfAlgo::GetCNodeName(kernels[0]) == "InitDataSetQueue") {
|
||||
GetBatchElements(kernels[0]);
|
||||
ps::ps_cache_instance.Initialize();
|
||||
}
|
||||
ps::ps_cache_instance.DoProcessData(device_id_, context);
|
||||
}
|
||||
} else {
|
||||
// Assign parameter keys.
|
||||
AssignParamKey(kernel_graph);
|
||||
}
|
||||
}
|
||||
|
||||
void SessionBasic::GetBatchElements(const AnfNodePtr &kernel_node) const {
|
||||
auto shapes = common::AnfAlgo::GetNodeAttr<std::vector<std::vector<int64_t>>>(kernel_node, "shapes");
|
||||
auto types = common::AnfAlgo::GetNodeAttr<std::vector<TypePtr>>(kernel_node, "types");
|
||||
if (shapes.size() != types.size() || shapes.size() == 0 || types.size() == 0) {
|
||||
MS_LOG(EXCEPTION) << "Invalid shapes of op[InitDataSetQueue]: shapes size " << shapes.size() << ", types size "
|
||||
<< types;
|
||||
}
|
||||
size_t batch_elements = 1;
|
||||
const auto &shape = shapes[0];
|
||||
for (size_t i = 0; i < shape.size(); ++i) {
|
||||
batch_elements *= LongToSize(shape[i]);
|
||||
}
|
||||
ps::ps_cache_instance.set_batch_elements(batch_elements);
|
||||
}
|
||||
|
||||
void SessionBasic::CheckPSModeConsistence(const KernelGraphPtr &kernel_graph) const {
|
||||
auto input_nodes = kernel_graph->inputs();
|
||||
for (const auto &input_node : input_nodes) {
|
||||
if (!input_node->isa<Parameter>()) {
|
||||
continue;
|
||||
}
|
||||
auto pk_node = input_node->cast<ParameterPtr>();
|
||||
MS_EXCEPTION_IF_NULL(pk_node);
|
||||
auto param_info_ptr = pk_node->param_info();
|
||||
const std::string ¶m_name = pk_node->fullname_with_scope();
|
||||
|
||||
// If the Parameter is initialized on the server, and the user of the Parameter contains real CNode which executes
|
||||
// in device, an error message will be reported, and it is allowed to be used only by the side effect operator.
|
||||
if (param_info_ptr != nullptr && param_info_ptr->init_in_server() &&
|
||||
UseParamInitInServer(kernel_graph, input_node) && !ps::ps_cache_instance.IsHashTable(param_name)) {
|
||||
MS_LOG(EXCEPTION) << "Can not initialize the parameter[" << param_name
|
||||
<< "] in server, this parameter is used by kernel which executes in device";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SessionBasic::AssignParamKey(const KernelGraphPtr &kernel_graph) const {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
// PS embeddingLookup cache check.
|
||||
if (ps::PsDataPrefetch::GetInstance().cache_enable()) {
|
||||
MS_LOG(EXCEPTION) << "The other parameter can't set ps mode when the embeddingLookup cache is enabled in "
|
||||
"parameter server training mode.";
|
||||
}
|
||||
std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph->get_return());
|
||||
for (auto &node : node_list) {
|
||||
if (node != nullptr && node->isa<CNode>()) {
|
||||
// Assign key for forward kernel EmbeddingLookup.
|
||||
// The key will be assigned to embedding table ande Push kernel as well.
|
||||
if (common::AnfAlgo::GetCNodeName(node) == kEmbeddingLookupOpName) {
|
||||
size_t embedding_table_idx = 0;
|
||||
auto embedding_table = common::AnfAlgo::GetInputNode(node->cast<CNodePtr>(), embedding_table_idx);
|
||||
size_t key = ps::Worker::GetInstance().SetParamKey(embedding_table->fullname_with_scope());
|
||||
common::AnfAlgo::SetNodeAttr(kAttrPsKey, MakeValue(key), node);
|
||||
} else if (common::AnfAlgo::GetCNodeName(node) == kPushOpName) {
|
||||
auto pull_node = FindPullNode(node, node_list);
|
||||
if (!pull_node) {
|
||||
MS_LOG(EXCEPTION) << "Assigning parameter key failed: can't find Pull node of the Push node.";
|
||||
}
|
||||
|
||||
// Second input of Pull node is the trainable parameter.
|
||||
size_t parameter_index = 1;
|
||||
auto parameter_node = common::AnfAlgo::GetInputNode(pull_node->cast<CNodePtr>(), parameter_index);
|
||||
size_t key = ps::Worker::GetInstance().SetParamKey(parameter_node->fullname_with_scope());
|
||||
common::AnfAlgo::SetNodeAttr(kAttrPsKey, MakeValue(key), node);
|
||||
common::AnfAlgo::SetNodeAttr(kAttrPsKey, MakeValue(key), pull_node);
|
||||
|
||||
std::string optimizer_name = common::AnfAlgo::GetNodeAttr<std::string>(node, kAttrOptimizerType);
|
||||
ps::Worker::GetInstance().SetKeyOptimId(key, optimizer_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SessionBasic::InitPSParamAndOptim(const KernelGraphPtr &kernel_graph,
|
||||
const std::vector<tensor::TensorPtr> &inputs_const) const {
|
||||
if (!ps::PSContext::instance()->is_worker()) {
|
||||
return;
|
||||
}
|
||||
std::vector<tensor::TensorPtr> inputs(inputs_const);
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto input_nodes = kernel_graph->inputs();
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
for (size_t i = 0; i < inputs.size(); ++i) {
|
||||
auto tensor = inputs[i];
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
auto input_node = input_nodes[i];
|
||||
MS_EXCEPTION_IF_NULL(input_node);
|
||||
if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0)) {
|
||||
ps::Worker::GetInstance().InitPSParamAndOptim(input_node, tensor);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} // namespace session
|
||||
void DumpGraphExeOrder(const std::string &file_name, const std::string &target_dir,
|
||||
const std::vector<CNodePtr> &execution_order) {
|
||||
|
|
|
@ -137,9 +137,6 @@ class BACKEND_EXPORT SessionBasic : public std::enable_shared_from_this<SessionB
|
|||
// get graph id in child graphs by ME front anf node pointer
|
||||
virtual GraphId GetGraphIdByNode(const AnfNodePtr &) const;
|
||||
virtual GraphId GetFinalRunGraph() const { return kInvalidGraphId; }
|
||||
void AssignParamKey(const KernelGraphPtr &kernel_graph) const;
|
||||
void InitPSParamAndOptim(const KernelGraphPtr &kernel_graph,
|
||||
const std::vector<tensor::TensorPtr> &inputs_const) const;
|
||||
bool IsGetNextGraph(const std::shared_ptr<KernelGraph> &kernel_graph, std::string *channel_name) const;
|
||||
virtual bool CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs,
|
||||
std::string *error_msg) const {
|
||||
|
@ -345,11 +342,6 @@ class BACKEND_EXPORT SessionBasic : public std::enable_shared_from_this<SessionB
|
|||
virtual std::string GetCommWorldGroup() { return std::string(); }
|
||||
void DumpGraphs(const std::vector<KernelGraphPtr> &graphs) const;
|
||||
void GetConstValueDepend(const CNodePtr &cnode, std::vector<size_t> *const_input_attr_index) const;
|
||||
#ifdef WITH_BACKEND
|
||||
void CheckPSModeConsistence(const KernelGraphPtr &kernel_graph) const;
|
||||
void GetBatchElements(const AnfNodePtr &kernel_node) const;
|
||||
void InitPsWorker(const KernelGraphPtr &kernel_graph) const;
|
||||
#endif
|
||||
// TODO(caifubi): refactor and remove bucket.
|
||||
std::map<uint32_t, std::vector<std::shared_ptr<device::Bucket>>> bucket_map_;
|
||||
std::map<uint32_t, uint32_t> free_bucket_id_map_;
|
||||
|
|
|
@ -156,6 +156,9 @@ class BACKEND_EXPORT EmbeddingCacheTableManager {
|
|||
// automatic parallel scenario.
|
||||
int cache_indices_lower_bound() const;
|
||||
|
||||
// Get vocab cache size on device.
|
||||
size_t vocab_cache_size() const { return device_cache_size_; }
|
||||
|
||||
void DumpHashTables() const;
|
||||
|
||||
private:
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
#include "frontend/parallel/graph_util/generate_graph.h"
|
||||
#include "include/common/utils/parallel_context.h"
|
||||
#ifdef WITH_BACKEND
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "ps/ps_context.h"
|
||||
#include "distributed/embedding_cache/embedding_cache_utils.h"
|
||||
|
@ -760,8 +760,6 @@ Status GatherInfo::InferBias() {
|
|||
if (ps::PsDataPrefetch::GetInstance().cache_enable()) {
|
||||
if (ps::PSContext::instance()->enable_distributed_mindrt()) {
|
||||
bias_ = static_cast<int64_t>(embedding_cache_table_manager.cache_indices_lower_bound());
|
||||
} else {
|
||||
bias_ = static_cast<int64_t>(ps::PsCacheManager::GetInstance().cache_indices_lower_bound());
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
|
|
@ -30,7 +30,8 @@
|
|||
#include "include/common/utils/parallel_context.h"
|
||||
#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
|
||||
#ifdef WITH_BACKEND
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
|
||||
#include "ps/ps_context.h"
|
||||
#include "distributed/embedding_cache/embedding_cache_utils.h"
|
||||
#endif
|
||||
|
||||
|
@ -112,10 +113,8 @@ Status UniqueInfo::ComputeReplaceGraph(const CNodePtr &cnode) {
|
|||
int64_t bias = 0;
|
||||
if (ps::PSContext::instance()->enable_distributed_mindrt()) {
|
||||
bias = static_cast<int64_t>(embedding_cache_table_manager.cache_indices_lower_bound());
|
||||
} else {
|
||||
bias = static_cast<int64_t>(ps::PsCacheManager::GetInstance().cache_indices_lower_bound());
|
||||
}
|
||||
auto slice_size = SizeToLong(ps::PsCacheManager::GetInstance().vocab_cache_size());
|
||||
auto slice_size = SizeToLong(embedding_cache_table_manager.vocab_cache_size());
|
||||
|
||||
auto sub = gen_g.PushBack({gen_g.NewOpInst(SUB), gen_g.virtual_input_node(), CreateInt32Tensor(bias)});
|
||||
auto relu = gen_g.PushBack({gen_g.NewOpInst(RELU), sub});
|
||||
|
|
|
@ -53,9 +53,7 @@
|
|||
#include "load_mindir/infer_mindir.h"
|
||||
#include "debug/data_dump/dump_json_parser.h"
|
||||
#ifdef WITH_BACKEND
|
||||
#include "ps/parameter_server.h"
|
||||
#include "ps/scheduler.h"
|
||||
#include "ps/worker.h"
|
||||
#include "fl/worker/fl_worker.h"
|
||||
#include "fl/server/server.h"
|
||||
#include "distributed/cluster/cluster_context.h"
|
||||
|
@ -1169,32 +1167,11 @@ bool ExecuteAction(const ResourcePtr &resource) {
|
|||
}
|
||||
|
||||
#ifdef WITH_BACKEND
|
||||
bool StartPSWorkerAction(const ResourcePtr &) {
|
||||
ps::Worker::GetInstance().Run();
|
||||
return true;
|
||||
}
|
||||
bool StartFLWorkerAction(const ResourcePtr &) {
|
||||
fl::worker::FLWorker::GetInstance().Run();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StartPSServerAction(const ResourcePtr &resource) {
|
||||
if (distributed::cluster::ClusterContext::instance()->initialized()) {
|
||||
MS_LOG(INFO) << "This node is server. Start wait for finalizing.";
|
||||
if (!distributed::cluster::ClusterContext::instance()->Finalize(UINT32_MAX)) {
|
||||
MS_LOG(ERROR) << "Failed to finalize server.";
|
||||
return false;
|
||||
}
|
||||
MS_LOG(INFO) << "Server is successfully finalized.";
|
||||
return true;
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(resource);
|
||||
FuncGraphPtr func_graph = resource->func_graph();
|
||||
auto &ps = ps::ParameterServer::GetInstance();
|
||||
ps.Run(func_graph);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StartServerAction(const ResourcePtr &resource) {
|
||||
MS_EXCEPTION_IF_NULL(resource);
|
||||
FuncGraphPtr func_graph = resource->func_graph();
|
||||
|
@ -1578,8 +1555,6 @@ std::vector<ActionItem> VmPipeline(const ResourcePtr &resource) {
|
|||
std::string server_mode = ps::PSContext::instance()->server_mode();
|
||||
if (server_mode == ps::kServerModeFL || server_mode == ps::kServerModeHybrid) {
|
||||
(void)actions.emplace_back(std::make_pair("worker", StartFLWorkerAction));
|
||||
} else {
|
||||
(void)actions.emplace_back(std::make_pair("worker", StartPSWorkerAction));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1623,18 +1598,6 @@ std::vector<ActionItem> ServerPipeline(const ResourcePtr &resource) {
|
|||
return actions;
|
||||
}
|
||||
|
||||
std::vector<ActionItem> PServerPipeline(const ResourcePtr &resource) {
|
||||
if (resource->EnableCompileCache() && resource->func_graph() != nullptr) {
|
||||
return {std::make_pair("pserver", StartPSServerAction)};
|
||||
}
|
||||
auto actions = CommonPipeline();
|
||||
(void)actions.emplace_back(std::make_pair("optimize", VmOptimizeAction));
|
||||
(void)actions.emplace_back(std::make_pair("auto_monad_reorder", OrderEnforceAction));
|
||||
(void)actions.emplace_back(std::make_pair("validate", ValidateAction));
|
||||
(void)actions.emplace_back(std::make_pair("pserver", StartPSServerAction));
|
||||
return actions;
|
||||
}
|
||||
|
||||
std::vector<ActionItem> PSchedulerPipeline(const ResourcePtr &resource) {
|
||||
if (resource->EnableCompileCache() && resource->func_graph() != nullptr) {
|
||||
return {std::make_pair("scheduler", StartPSSchedulerAction)};
|
||||
|
|
|
@ -40,9 +40,7 @@ bool VmOptimizeAction(const ResourcePtr &resource);
|
|||
bool PynativeElimOpt(const ResourcePtr &resource);
|
||||
bool TaskEmitAction(const ResourcePtr &resource);
|
||||
bool ExecuteAction(const ResourcePtr &resource);
|
||||
bool StartPSWorkerAction(const ResourcePtr &resource);
|
||||
bool StartFLWorkerAction(const ResourcePtr &resource);
|
||||
bool StartPSServerAction(const ResourcePtr &resource);
|
||||
bool StartPSSchedulerAction(const ResourcePtr &resource);
|
||||
// This action is only for federated learning only. In later version, parameter server mode and federated learning will
|
||||
// use the same action.
|
||||
|
|
|
@ -69,9 +69,7 @@
|
|||
#ifdef WITH_BACKEND
|
||||
#include "ps/constants.h"
|
||||
#include "ps/util.h"
|
||||
#include "ps/worker.h"
|
||||
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#include "fl/server/server.h"
|
||||
#include "fl/worker/fl_worker.h"
|
||||
#include "distributed/cluster/cluster_context.h"
|
||||
|
@ -771,10 +769,6 @@ std::vector<ActionItem> GetPipeline(const ResourcePtr &resource, const std::stri
|
|||
ps::PSContext::instance()->is_server()) {
|
||||
return ServerPipeline(resource);
|
||||
}
|
||||
if (ps::PSContext::instance()->is_server()) {
|
||||
resource->SetBackendAsync([]() { return compile::CreateBackend(); });
|
||||
return PServerPipeline(resource);
|
||||
}
|
||||
if (ps::PSContext::instance()->is_scheduler()) {
|
||||
return PSchedulerPipeline(resource);
|
||||
}
|
||||
|
@ -1737,15 +1731,10 @@ void ClearResAtexit() {
|
|||
#ifdef WITH_BACKEND
|
||||
if (!distributed::cluster::ClusterContext::instance()->initialized() && ps::PSContext::instance()->is_ps_mode() &&
|
||||
ps::PSContext::instance()->is_worker()) {
|
||||
if (ps::PsDataPrefetch::GetInstance().cache_enable()) {
|
||||
ps::ps_cache_instance.Finalize();
|
||||
}
|
||||
MS_LOG(INFO) << "Start finalizing worker.";
|
||||
const std::string &server_mode = ps::PSContext::instance()->server_mode();
|
||||
if ((server_mode == ps::kServerModeFL || server_mode == ps::kServerModeHybrid)) {
|
||||
fl::worker::FLWorker::GetInstance().Finalize();
|
||||
} else {
|
||||
ps::Worker::GetInstance().Finalize();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1,310 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/ascend/hal/device/ps/ascend_ps_cache.h"
|
||||
#include <google/protobuf/text_format.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "ps/ps_cache/ps_cache_factory.h"
|
||||
#include "plugin/device/ascend/hal/device/ascend_memory_pool.h"
|
||||
#include "plugin/device/ascend/kernel/aicpu/aicpu_kernel_mod.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "proto/tensor.pb.h"
|
||||
#include "proto/tensor_shape.pb.h"
|
||||
#include "proto/attr.pb.h"
|
||||
#include "proto/node_def.pb.h"
|
||||
#include "runtime/rt.h"
|
||||
#include "acl/acl_rt.h"
|
||||
|
||||
using mindspore::kernel::Address;
|
||||
using AddressPtr = std::shared_ptr<Address>;
|
||||
using AddressPtrList = std::vector<AddressPtr>;
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
namespace ascend {
|
||||
MS_REG_PS_CACHE(kAscendDevice, AscendPsCache);
|
||||
namespace {
|
||||
bool SetProtoInputs(const std::vector<std::vector<size_t>> &data_shape, const std::vector<TypeId> &data_type,
|
||||
mindspore::NodeDef *proto) {
|
||||
MS_ERROR_IF_NULL(proto);
|
||||
if (data_shape.size() != data_type.size()) {
|
||||
MS_LOG(ERROR) << "The size of data shape is not equal to the size of data type.";
|
||||
return false;
|
||||
}
|
||||
for (size_t input_index = 0; input_index < data_shape.size(); input_index++) {
|
||||
::mindspore::Tensor *proto_inputs = proto->add_inputs();
|
||||
MS_ERROR_IF_NULL(proto_inputs);
|
||||
auto input_shape = data_shape[input_index];
|
||||
mindspore::TensorShape *tensorShape = proto_inputs->mutable_tensor_shape();
|
||||
MS_ERROR_IF_NULL(tensorShape);
|
||||
for (auto item : input_shape) {
|
||||
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
|
||||
MS_ERROR_IF_NULL(dim);
|
||||
dim->set_size((::google::protobuf::int64)item);
|
||||
}
|
||||
auto input_type = kernel::AicpuOpUtil::MsTypeToProtoType(data_type[input_index]);
|
||||
proto_inputs->set_tensor_type(input_type);
|
||||
proto_inputs->set_mem_device("HBM");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SetProtoOutputs(const std::vector<std::vector<size_t>> &data_shape, const std::vector<TypeId> &data_type,
|
||||
mindspore::NodeDef *proto) {
|
||||
MS_ERROR_IF_NULL(proto);
|
||||
if (data_shape.size() != data_type.size()) {
|
||||
MS_LOG(ERROR) << "The size of data shape is not equal to the size of data type.";
|
||||
return false;
|
||||
}
|
||||
for (size_t output_index = 0; output_index < data_shape.size(); output_index++) {
|
||||
::mindspore::Tensor *proto_outputs = proto->add_outputs();
|
||||
MS_ERROR_IF_NULL(proto_outputs);
|
||||
auto output_shape = data_shape[output_index];
|
||||
mindspore::TensorShape *tensorShape = proto_outputs->mutable_tensor_shape();
|
||||
MS_ERROR_IF_NULL(tensorShape);
|
||||
for (auto item : output_shape) {
|
||||
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
|
||||
MS_ERROR_IF_NULL(dim);
|
||||
dim->set_size((::google::protobuf::int64)item);
|
||||
}
|
||||
auto output_type = kernel::AicpuOpUtil::MsTypeToProtoType(data_type[output_index]);
|
||||
proto_outputs->set_tensor_type(output_type);
|
||||
proto_outputs->set_mem_device("HBM");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SetNodedefProto(const std::shared_ptr<KernelNodeInfo> &op_info,
|
||||
const std::shared_ptr<kernel::AicpuOpKernelMod> &kernel_mod_ptr) {
|
||||
MS_ERROR_IF_NULL(op_info);
|
||||
MS_ERROR_IF_NULL(kernel_mod_ptr);
|
||||
mindspore::NodeDef proto;
|
||||
proto.set_op(op_info->op_name_);
|
||||
RETURN_IF_FALSE(SetProtoInputs(op_info->input_data_shape_, op_info->input_data_type_, &proto));
|
||||
RETURN_IF_FALSE(SetProtoOutputs(op_info->output_data_shape_, op_info->output_data_type_, &proto));
|
||||
std::string nodeDefStr;
|
||||
if (!proto.SerializeToString(&nodeDefStr)) {
|
||||
MS_LOG(ERROR) << "Serialize nodeDef to string failed.";
|
||||
return false;
|
||||
}
|
||||
MS_LOG(DEBUG) << "Set node def proto, node name:" << op_info->op_name_;
|
||||
kernel_mod_ptr->SetNodeDef(nodeDefStr);
|
||||
return true;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
bool AscendPsCache::InitDevice(uint32_t device_id, const void *context) {
|
||||
MS_ERROR_IF_NULL(context);
|
||||
auto ret = rtSetDevice(UintToInt(device_id));
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Call rtSetDevice, ret[" << ret << "]";
|
||||
return false;
|
||||
}
|
||||
auto rt_context = const_cast<rtContext_t>(context);
|
||||
ret = rtCtxSetCurrent(rt_context);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Call rtCtxSetCurrent, ret[" << ret << "]";
|
||||
return false;
|
||||
}
|
||||
ret = rtStreamCreate(&stream_, 0);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Call rtStreamCreate, ret[" << ret << "]";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void *AscendPsCache::MallocMemory(size_t size) {
|
||||
const auto device_addr = device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(size);
|
||||
if (device_addr == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << size;
|
||||
}
|
||||
return device_addr;
|
||||
}
|
||||
|
||||
void AscendPsCache::FreeMemory(void *device_addr) {
|
||||
device::ascend::AscendMemoryPool::GetInstance().FreeTensorMem(device_addr);
|
||||
}
|
||||
|
||||
bool AscendPsCache::MallocConstantMemory(size_t cache_vocab_size) {
|
||||
offset_addr_ = reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
|
||||
if (offset_addr_ == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << sizeof(int);
|
||||
}
|
||||
rtMemset(offset_addr_, sizeof(int), 0, sizeof(int));
|
||||
cache_vocab_size_addr_ =
|
||||
reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
|
||||
if (cache_vocab_size_addr_ == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << sizeof(int);
|
||||
}
|
||||
int copy_value = SizeToInt(cache_vocab_size);
|
||||
if (!CopyHostMemToDevice(cache_vocab_size_addr_, ©_value, sizeof(int))) {
|
||||
return false;
|
||||
}
|
||||
return SynchronizeStream();
|
||||
}
|
||||
|
||||
bool AscendPsCache::RecordEvent() {
|
||||
event_.reset(new rtEvent_t());
|
||||
MS_ERROR_IF_NULL_W_RET_VAL(event_, false);
|
||||
auto ret = rtEventCreate(&(*event_));
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Create event failed";
|
||||
return false;
|
||||
}
|
||||
ret = rtEventRecord(*event_, stream_);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Record event failed";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AscendPsCache::SynchronizeEvent() {
|
||||
MS_ERROR_IF_NULL_W_RET_VAL(event_, false);
|
||||
auto ret = rtEventSynchronize(*event_);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "tEventSynchronize failed";
|
||||
return false;
|
||||
}
|
||||
ret = rtEventDestroy(*event_);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "rtEventDestroy failed";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AscendPsCache::SynchronizeStream() {
|
||||
MS_ERROR_IF_NULL_W_RET_VAL(stream_, false);
|
||||
auto ret = rtStreamSynchronize(stream_);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "rtStreamSynchronize failed";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AscendPsCache::CopyHostMemToDevice(void *dst, const void *src, size_t size) {
|
||||
MS_ERROR_IF_NULL(dst);
|
||||
MS_ERROR_IF_NULL(src);
|
||||
auto ret = aclrtMemcpyAsync(dst, size, src, size, ACL_MEMCPY_HOST_TO_DEVICE, stream_);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "aclrtMemcpyAsync failed, the error num is:" << ret;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AscendPsCache::CopyDeviceMemToHost(void *dst, const void *src, size_t size) {
|
||||
MS_ERROR_IF_NULL(dst);
|
||||
MS_ERROR_IF_NULL(src);
|
||||
auto ret = aclrtMemcpyAsync(dst, size, src, size, ACL_MEMCPY_DEVICE_TO_HOST, stream_);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "aclrtMemcpyAsync failed, the error num is:" << ret;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AscendPsCache::HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr,
|
||||
size_t cache_vocab_size, size_t embedding_size, size_t swap_out_size) {
|
||||
MS_ERROR_IF_NULL(hash_table_addr);
|
||||
MS_ERROR_IF_NULL(swap_out_value_addr);
|
||||
MS_ERROR_IF_NULL(swap_out_index_addr);
|
||||
auto hash_swap_out_mod = std::make_shared<kernel::AicpuOpKernelMod>();
|
||||
MS_ERROR_IF_NULL(hash_swap_out_mod);
|
||||
hash_swap_out_mod->SetNodeName(kEmbeddingLookupOpName);
|
||||
|
||||
std::vector<size_t> hash_table_shape = {cache_vocab_size, embedding_size};
|
||||
std::vector<size_t> swap_out_index_shape = {swap_out_size};
|
||||
std::vector<size_t> offset_shape = {1};
|
||||
std::vector<std::vector<size_t>> input_shape = {hash_table_shape, swap_out_index_shape, offset_shape};
|
||||
|
||||
std::vector<size_t> swap_out_value_shape = {swap_out_size, embedding_size};
|
||||
std::vector<std::vector<size_t>> output_shape = {swap_out_value_shape};
|
||||
|
||||
std::vector<TypeId> input_type = {TypeId::kNumberTypeFloat32, TypeId::kNumberTypeInt32, TypeId::kNumberTypeInt32};
|
||||
std::vector<TypeId> output_type = {TypeId::kNumberTypeFloat32};
|
||||
auto op_info =
|
||||
std::make_shared<KernelNodeInfo>(kEmbeddingLookupOpName, input_shape, input_type, output_shape, output_type);
|
||||
MS_ERROR_IF_NULL_W_RET_VAL(op_info, false);
|
||||
RETURN_IF_FALSE(SetNodedefProto(op_info, hash_swap_out_mod));
|
||||
|
||||
AddressPtrList kernel_inputs;
|
||||
AddressPtrList kernel_outputs = {
|
||||
std::make_shared<Address>(swap_out_value_addr, swap_out_size * embedding_size * sizeof(float))};
|
||||
AddressPtrList kernel_workspaces;
|
||||
(void)kernel_inputs.emplace_back(
|
||||
std::make_shared<Address>(hash_table_addr, cache_vocab_size * embedding_size * sizeof(float)));
|
||||
(void)kernel_inputs.emplace_back(std::make_shared<Address>(swap_out_index_addr, swap_out_size * sizeof(int)));
|
||||
(void)kernel_inputs.emplace_back(std::make_shared<Address>(offset_addr_, sizeof(int)));
|
||||
auto ret = hash_swap_out_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "Hash swap out launch failed.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AscendPsCache::HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr,
|
||||
size_t cache_vocab_size, size_t embedding_size, size_t swap_in_size) {
|
||||
MS_ERROR_IF_NULL(hash_table_addr);
|
||||
MS_ERROR_IF_NULL(swap_in_value_addr);
|
||||
MS_ERROR_IF_NULL(swap_in_index_addr);
|
||||
auto hash_swap_in_mod = std::make_shared<kernel::AicpuOpKernelMod>();
|
||||
MS_ERROR_IF_NULL(hash_swap_in_mod);
|
||||
hash_swap_in_mod->SetNodeName(kernel::kUpdateCache);
|
||||
|
||||
std::vector<size_t> hash_table_shape = {cache_vocab_size, embedding_size};
|
||||
std::vector<size_t> swap_in_index_shape = {swap_in_size};
|
||||
std::vector<size_t> swap_in_value_shape = {swap_in_size, embedding_size};
|
||||
std::vector<size_t> offset_shape = {1};
|
||||
std::vector<std::vector<size_t>> input_shape = {hash_table_shape, swap_in_index_shape, swap_in_value_shape,
|
||||
offset_shape};
|
||||
std::vector<std::vector<size_t>> output_shape = {offset_shape};
|
||||
|
||||
std::vector<TypeId> input_type = {TypeId::kNumberTypeFloat32, TypeId::kNumberTypeInt32, TypeId::kNumberTypeFloat32,
|
||||
TypeId::kNumberTypeInt32};
|
||||
std::vector<TypeId> output_type = {TypeId::kNumberTypeInt32};
|
||||
auto op_info =
|
||||
std::make_shared<KernelNodeInfo>(kernel::kUpdateCache, input_shape, input_type, output_shape, output_type);
|
||||
MS_ERROR_IF_NULL_W_RET_VAL(op_info, false);
|
||||
SetNodedefProto(op_info, hash_swap_in_mod);
|
||||
|
||||
AddressPtrList kernel_inputs;
|
||||
AddressPtrList kernel_outputs;
|
||||
AddressPtrList kernel_workspaces;
|
||||
(void)kernel_inputs.emplace_back(
|
||||
std::make_shared<Address>(hash_table_addr, cache_vocab_size * embedding_size * sizeof(float)));
|
||||
(void)kernel_inputs.emplace_back(std::make_shared<Address>(swap_in_index_addr, swap_in_size * sizeof(int)));
|
||||
(void)kernel_inputs.emplace_back(
|
||||
std::make_shared<Address>(swap_in_value_addr, swap_in_size * embedding_size * sizeof(float)));
|
||||
(void)kernel_inputs.emplace_back(std::make_shared<Address>(cache_vocab_size_addr_, sizeof(int)));
|
||||
// The output of updateCache kernel is required but not useful, so any address can be assigned.
|
||||
(void)kernel_outputs.emplace_back(std::make_shared<Address>(offset_addr_, sizeof(int)));
|
||||
auto ret = hash_swap_in_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "Hash swap in launch failed.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace ascend
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
|
@ -1,75 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_PS_ASCEND_PS_CACHE_H_
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_PS_ASCEND_PS_CACHE_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "ps/ps_cache/ps_cache_basic.h"
|
||||
#include "plugin/device/ascend/kernel/aicpu/aicpu_kernel_mod.h"
|
||||
#include "ir/dtype.h"
|
||||
#include "runtime/base.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
namespace ascend {
|
||||
struct KernelNodeInfo {
|
||||
KernelNodeInfo(const std::string &op_name, std::vector<std::vector<size_t>> input_data_shape,
|
||||
std::vector<TypeId> input_data_type, std::vector<std::vector<size_t>> output_data_shape,
|
||||
std::vector<TypeId> output_data_type)
|
||||
: op_name_(op_name) {
|
||||
input_data_shape_.swap(input_data_shape);
|
||||
input_data_type_.swap(input_data_type);
|
||||
output_data_shape_.swap(output_data_shape);
|
||||
output_data_type_.swap(output_data_type);
|
||||
}
|
||||
std::string op_name_;
|
||||
std::vector<std::vector<size_t>> input_data_shape_;
|
||||
std::vector<TypeId> input_data_type_;
|
||||
std::vector<std::vector<size_t>> output_data_shape_;
|
||||
std::vector<TypeId> output_data_type_;
|
||||
};
|
||||
|
||||
class AscendPsCache : public PsCacheBasic {
|
||||
public:
|
||||
AscendPsCache() = default;
|
||||
~AscendPsCache() override = default;
|
||||
bool InitDevice(uint32_t device_id, const void *context) override;
|
||||
void *MallocMemory(size_t size) override;
|
||||
void FreeMemory(void *device_addr) override;
|
||||
bool MallocConstantMemory(size_t cache_vocab_size) override;
|
||||
bool RecordEvent() override;
|
||||
bool SynchronizeEvent() override;
|
||||
bool SynchronizeStream() override;
|
||||
bool CopyHostMemToDevice(void *dst, const void *src, size_t size) override;
|
||||
bool CopyDeviceMemToHost(void *dst, const void *src, size_t size) override;
|
||||
bool HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr, size_t cache_vocab_size,
|
||||
size_t embedding_size, size_t swap_out_size) override;
|
||||
bool HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr, size_t cache_vocab_size,
|
||||
size_t embedding_size, size_t swap_in_size) override;
|
||||
|
||||
private:
|
||||
int *offset_addr_{nullptr};
|
||||
int *cache_vocab_size_addr_{nullptr};
|
||||
std::unique_ptr<rtEvent_t> event_;
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_PS_ASCEND_PS_CACHE_H_
|
|
@ -72,7 +72,6 @@
|
|||
#endif
|
||||
#ifdef WITH_BACKEND
|
||||
#include "ps/util.h"
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#endif
|
||||
#include "plugin/device/ascend/hal/device/ascend_bucket.h"
|
||||
#include "plugin/device/ascend/hal/device/ascend_device_address.h"
|
||||
|
@ -249,12 +248,6 @@ bool TensorNeedSync(const std::shared_ptr<KernelGraph> &kernel_graph, const AnfN
|
|||
}
|
||||
MS_EXCEPTION_IF_NULL(memcpy_nums);
|
||||
(*memcpy_nums)++;
|
||||
#ifdef WITH_BACKEND
|
||||
const std::string ¶m_name = parameter->fullname_with_scope();
|
||||
if (ps::ps_cache_instance.IsHashTable(param_name)) {
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
auto input_param = parameter->cast<ParameterPtr>();
|
||||
MS_EXCEPTION_IF_NULL(input_param);
|
||||
if (common::AnfAlgo::IsParameterWeight(input_param) || kernel_graph->IsUpdatedParameter(input_param)) {
|
||||
|
@ -347,12 +340,6 @@ void AscendSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_gra
|
|||
}
|
||||
if (AnfAlgo::OutputAddrExist(input_node, 0) &&
|
||||
TensorNeedSync(kernel_graph, input_node, tensor, &device_memcpy_nums)) {
|
||||
#ifdef WITH_BACKEND
|
||||
const std::string ¶m_name = input_node->fullname_with_scope();
|
||||
if (ps::ps_cache_instance.IsHashTable(param_name)) {
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
auto device_address = AnfAlgo::GetMutableOutputAddr(input_node, 0);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
if (size != 0 &&
|
||||
|
@ -460,9 +447,6 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
|
|||
|
||||
// adjust kernel
|
||||
AdjustKernel(root_graph);
|
||||
#ifdef WITH_BACKEND
|
||||
InitPsWorker(root_graph);
|
||||
#endif
|
||||
// assign stream
|
||||
AssignStream(NOT_NULL(root_graph));
|
||||
#ifndef ENABLE_SECURITY
|
||||
|
@ -539,9 +523,6 @@ void AscendSession::BuildGraphImpl(GraphId graph_id) {
|
|||
single_graph->UpdateExecuteKernelStreamLabel();
|
||||
// adjust execution order because merge child graph and other special operations
|
||||
AdjustKernel(graph);
|
||||
#ifdef WITH_BACKEND
|
||||
InitPsWorker(graph);
|
||||
#endif
|
||||
// Assign streams for control sink and hccl and so on
|
||||
AssignStream(NOT_NULL(graph));
|
||||
#ifndef ENABLE_SECURITY
|
||||
|
@ -617,14 +598,6 @@ void AscendSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_g
|
|||
debugger_->PreExecute(kernel_graph);
|
||||
}
|
||||
#endif
|
||||
#ifdef WITH_BACKEND
|
||||
// Initialize parameter server
|
||||
InitPSParamAndOptim(kernel_graph, inputs);
|
||||
std::string channel_name;
|
||||
if (ps::PsDataPrefetch::GetInstance().cache_enable() && IsGetNextGraph(kernel_graph, &channel_name)) {
|
||||
ps::ps_cache_instance.IncreaseGraphStep(channel_name);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Ascend old runtime.
|
||||
|
|
|
@ -94,7 +94,6 @@ void CPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
|
|||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode && ps::PSContext::instance()->is_ps_mode()) {
|
||||
AssignParamKey(kernel_graph);
|
||||
if (ps::PSContext::instance()->is_worker()) {
|
||||
std::string pass_name = "replace_node_by_proxy";
|
||||
pass_name.append(std::to_string(graph_sum_));
|
||||
|
@ -197,10 +196,6 @@ void CPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_grap
|
|||
const std::vector<tensor::TensorPtr> &inputs, VectorRef *const outputs) {
|
||||
MS_LOG(INFO) << "Bind input output address";
|
||||
runtime_.BindInputOutput(kernel_graph.get(), inputs, outputs);
|
||||
|
||||
#ifdef WITH_BACKEND
|
||||
InitPSParamAndOptim(kernel_graph, inputs);
|
||||
#endif
|
||||
}
|
||||
|
||||
void CPUSession::PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/cpu/kernel/ps/apply_momentum_ps_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
bool ApplyMomentumPSKernelMod::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
return Launch(inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
const std::vector<size_t> &ApplyMomentumPSKernelMod::input_sizes() const { return GetInputSizeList(); }
|
||||
|
||||
const std::vector<size_t> &ApplyMomentumPSKernelMod::output_sizes() const { return GetOutputSizeList(); }
|
||||
|
||||
const std::vector<size_t> &ApplyMomentumPSKernelMod::workspace_sizes() const { return GetWorkspaceSizeList(); }
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -1,45 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_APPLY_MOMENTUM_PS_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_APPLY_MOMENTUM_PS_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
|
||||
#include "plugin/device/cpu/kernel/apply_momentum_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
class ApplyMomentumPSKernelMod : public ApplyMomentumCpuKernelMod, public PServerKernel {
|
||||
public:
|
||||
ApplyMomentumPSKernelMod(size_t rank_id, size_t pserver_num, size_t worker_num)
|
||||
: PServerKernel(rank_id, pserver_num, worker_num) {}
|
||||
~ApplyMomentumPSKernelMod() override = default;
|
||||
|
||||
bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
const std::vector<size_t> &input_sizes() const override;
|
||||
const std::vector<size_t> &output_sizes() const override;
|
||||
const std::vector<size_t> &workspace_sizes() const override;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_APPLY_MOMENTUM_PS_KERNEL_H_
|
|
@ -1,103 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/cpu/kernel/ps/embedding_look_up_proxy_kernel.h"
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "ps/worker.h"
|
||||
#include "ps/util.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
constexpr size_t kEmbeddingLookUpProxyInputsNum = 2;
|
||||
constexpr size_t kEmbeddingLookUpProxyOutputsNum = 1;
|
||||
|
||||
void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
EmbeddingLookUpCpuKernelMod::InitKernel(kernel_node);
|
||||
auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0));
|
||||
auto indices_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1));
|
||||
auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0));
|
||||
size_t axis = kShape2dDims - input_shape.size();
|
||||
if (input_shape.empty() || input_shape.size() > kShape2dDims) {
|
||||
MS_LOG(EXCEPTION) << "Input shape can not empty or greater than " << kShape2dDims << "-D, but got "
|
||||
<< input_shape.size();
|
||||
}
|
||||
|
||||
for (auto dim : input_shape) {
|
||||
input_dims_ *= dim;
|
||||
}
|
||||
if (input_dims_ * sizeof(float) > INT_MAX) {
|
||||
MS_LOG(EXCEPTION) << "PS mode embedding lookup max embedding table size is " << INT_MAX << ", current shape "
|
||||
<< input_shape << " is too large.";
|
||||
}
|
||||
|
||||
if (mindspore::ps::PSContext::instance()->is_worker()) {
|
||||
key_ = common::AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
|
||||
}
|
||||
std::vector<float> values;
|
||||
(void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(values),
|
||||
[](size_t dim) -> float { return SizeToFloat(dim); });
|
||||
(void)std::transform(indices_shape.begin(), indices_shape.end(), std::back_inserter(values),
|
||||
[](size_t dim) -> float { return SizeToFloat(dim); });
|
||||
(void)std::transform(output_shape.begin(), output_shape.end(), std::back_inserter(values),
|
||||
[](size_t dim) -> float { return SizeToFloat(dim); });
|
||||
MS_LOG(INFO) << "Init embedding lookup proxy kernel, input shape:" << input_shape
|
||||
<< ", indices_shape:" << indices_shape << ", output_shape:" << output_shape;
|
||||
if (mindspore::ps::PSContext::instance()->is_worker()) {
|
||||
mindspore::ps::Worker::GetInstance().AddEmbeddingTable(key_, input_shape[axis]);
|
||||
mindspore::ps::ParamInitInfoMessage info;
|
||||
if (!mindspore::ps::Worker::GetInstance().InitPSEmbeddingTable(key_, input_shape, indices_shape, output_shape,
|
||||
info)) {
|
||||
MS_LOG(EXCEPTION) << "InitPSEmbeddingTable failed.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool EmbeddingLookUpProxyKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kEmbeddingLookUpProxyInputsNum, kernel_name_);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kEmbeddingLookUpProxyOutputsNum, kernel_name_);
|
||||
auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr);
|
||||
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
|
||||
size_t input_size = inputs[1]->size;
|
||||
size_t output_size = outputs[0]->size;
|
||||
|
||||
size_t size = input_size / sizeof(int);
|
||||
std::vector<int> lookup_ids(size, 0);
|
||||
std::vector<float> lookup_result(output_size / sizeof(float), 0);
|
||||
auto ret = memcpy_s(lookup_ids.data(), lookup_ids.size() * sizeof(int), indices_addr, input_size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "Lookup id memcpy failed.";
|
||||
}
|
||||
if (!mindspore::ps::Worker::GetInstance().DoPSEmbeddingLookup(key_, lookup_ids, &lookup_result,
|
||||
mindspore::ps::kEmbeddingLookupCmd)) {
|
||||
MS_LOG(EXCEPTION) << "DoPSEmbeddingLookup failed.";
|
||||
}
|
||||
|
||||
auto ret2 = memcpy_s(output_addr, outputs[0]->size, lookup_result.data(), output_size);
|
||||
if (ret2 != EOK) {
|
||||
MS_LOG(EXCEPTION) << "Lookup result memcpy failed.";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, EmbeddingLookupProxy, EmbeddingLookUpProxyKernel);
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -1,51 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_
|
||||
|
||||
#include "plugin/device/cpu/kernel/embedding_look_up_cpu_kernel.h"
|
||||
#include <vector>
|
||||
#include "plugin/factory/ms_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
class EmbeddingLookUpProxyKernel : public EmbeddingLookUpCpuKernelMod {
|
||||
public:
|
||||
EmbeddingLookUpProxyKernel() = default;
|
||||
~EmbeddingLookUpProxyKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
std::vector<KernelAttr> GetOpSupport() override {
|
||||
static const std::vector<KernelAttr> support_list = {
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32)};
|
||||
return support_list;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t key_{0};
|
||||
size_t input_dims_{1};
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_
|
|
@ -1,114 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/cpu/kernel/ps/embedding_look_up_ps_kernel.h"
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include "kernel/common_utils.h"
|
||||
#include "ps/util.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
using mindspore::ps::Util;
|
||||
constexpr int kAxis = 0;
|
||||
constexpr size_t kEmbeddingLookUpPSInputSize = 3;
|
||||
|
||||
void EmbeddingLookUpPSKernelMod::InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &shapes) {
|
||||
const std::vector<std::shared_ptr<ShapeVector>> &shape_vec = *shapes;
|
||||
if (shape_vec.size() < kEmbeddingLookUpPSInputSize) {
|
||||
MS_LOG(EXCEPTION) << "EmbeddingLookUpPSKernelMod needs " << kEmbeddingLookUpPSInputSize << " input shapes, but got "
|
||||
<< shape_vec.size();
|
||||
}
|
||||
for (auto shape : shape_vec) {
|
||||
MS_EXCEPTION_IF_NULL(shape);
|
||||
}
|
||||
auto input_shape = *(shape_vec[0]);
|
||||
if (input_shape.empty()) {
|
||||
MS_LOG(EXCEPTION) << "Input shape can not empty";
|
||||
}
|
||||
|
||||
first_dim_size_ = LongToSize(input_shape[0]);
|
||||
outer_dim_size_ *= SizeOf(input_shape);
|
||||
auto indices_shape = *(shape_vec[1]);
|
||||
indices_lens_ = SizeOf(indices_shape);
|
||||
size_t output_index = 2;
|
||||
auto output_shape = *(shape_vec[output_index]);
|
||||
|
||||
int64_t offset = 0;
|
||||
for (size_t i = 0; i < rank_id_; i++) {
|
||||
offset += Util::LocalShard(input_shape[kAxis], SizeToLong(i), SizeToLong(pserver_num_));
|
||||
}
|
||||
offset_ = offset;
|
||||
|
||||
// input shape must be sharded after computing offset_;
|
||||
Shard(&input_shape, kAxis);
|
||||
|
||||
input_shape_ = Convert2SizeT(input_shape);
|
||||
|
||||
size_t output_size = sizeof(float) * SizeOf(output_shape);
|
||||
(void)output_size_list_.emplace_back(output_size);
|
||||
}
|
||||
|
||||
void EmbeddingLookUpPSKernelMod::ReInit(const std::vector<ShapeVector> &shapes) {
|
||||
if (shapes.empty() || shapes[0].empty()) {
|
||||
MS_LOG(EXCEPTION) << "Shape can not empty";
|
||||
}
|
||||
const auto &indices_shape = shapes[0];
|
||||
indices_lens_ = LongToSize(indices_shape[0]);
|
||||
|
||||
size_t output_size = sizeof(float) * indices_lens_;
|
||||
for (size_t i = kAxis + 1; i < input_shape_.size(); i++) {
|
||||
output_size *= input_shape_[i];
|
||||
}
|
||||
output_size_list_.clear();
|
||||
(void)output_size_list_.emplace_back(output_size);
|
||||
}
|
||||
|
||||
bool EmbeddingLookUpPSKernelMod::Execute(const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
return Launch(inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
void EmbeddingLookUpPSKernelMod::UpdateEmbeddings(float *embedding_table, const size_t *lookup_ids,
|
||||
const float *update_vals, size_t ids_size) {
|
||||
size_t copy_len = outer_dim_size_ * sizeof(float);
|
||||
size_t dest_len = copy_len;
|
||||
for (size_t i = 0; i < ids_size; ++i) {
|
||||
int index = SizeToInt(lookup_ids[i]) - LongToInt(offset_);
|
||||
if (index < 0 || index >= SizeToInt(first_dim_size_)) {
|
||||
MS_LOG(EXCEPTION) << "UpdateEmbeddings index invalid.";
|
||||
}
|
||||
auto ret = memcpy_s(embedding_table + IntToSize(index) * outer_dim_size_, dest_len,
|
||||
update_vals + i * outer_dim_size_, copy_len);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "LookUpTable task memcpy failed.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<size_t> &EmbeddingLookUpPSKernelMod::input_sizes() const { return input_shape_; }
|
||||
|
||||
const std::vector<size_t> &EmbeddingLookUpPSKernelMod::output_sizes() const { return GetOutputSizeList(); }
|
||||
|
||||
const std::vector<size_t> &EmbeddingLookUpPSKernelMod::workspace_sizes() const { return GetWorkspaceSizeList(); }
|
||||
|
||||
int64_t EmbeddingLookUpPSKernelMod::offset() const { return offset_; }
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -1,53 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "plugin/device/cpu/kernel/embedding_look_up_cpu_kernel.h"
|
||||
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
class EmbeddingLookUpPSKernelMod : public EmbeddingLookUpCpuKernelMod, public PServerKernel {
|
||||
public:
|
||||
EmbeddingLookUpPSKernelMod(size_t rank_id, size_t pserver_num, size_t worker_num)
|
||||
: PServerKernel(rank_id, pserver_num, worker_num) {}
|
||||
~EmbeddingLookUpPSKernelMod() override = default;
|
||||
|
||||
void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &) override;
|
||||
void ReInit(const std::vector<ShapeVector> &) override;
|
||||
|
||||
bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
void UpdateEmbeddings(float *embedding_table, const size_t *lookup_ids, const float *update_vals,
|
||||
size_t ids_size) override;
|
||||
const std::vector<size_t> &input_sizes() const override;
|
||||
const std::vector<size_t> &output_sizes() const override;
|
||||
const std::vector<size_t> &workspace_sizes() const override;
|
||||
int64_t offset() const override;
|
||||
|
||||
private:
|
||||
std::vector<size_t> input_shape_;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_
|
|
@ -1,32 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
void PServerKernel::Shard(ShapeVector *shape, int axis) const {
|
||||
MS_EXCEPTION_IF_NULL(shape);
|
||||
if ((*shape).size() <= IntToSize(axis)) {
|
||||
MS_LOG(EXCEPTION) << "Shape size is invalid.";
|
||||
}
|
||||
(*shape)[IntToSize(axis)] =
|
||||
Util::LocalShard((*shape)[IntToSize(axis)], SizeToLong(rank_id_), SizeToLong(pserver_num_));
|
||||
}
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -1,59 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PSERVER_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PSERVER_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/kernel.h"
|
||||
#include "ps/util.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
using mindspore::ps::Util;
|
||||
class PServerKernel {
|
||||
public:
|
||||
PServerKernel(size_t rank_id, size_t pserver_num, size_t worker_num)
|
||||
: rank_id_(rank_id), pserver_num_(pserver_num), worker_num_(worker_num) {}
|
||||
~PServerKernel() = default;
|
||||
PServerKernel(const PServerKernel &) = delete;
|
||||
PServerKernel &operator=(const PServerKernel &) = delete;
|
||||
virtual void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &) {}
|
||||
virtual void InitKernel(const CNodePtr &, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &) {}
|
||||
virtual void ReInit(const std::vector<ShapeVector> &) {}
|
||||
virtual bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) = 0;
|
||||
virtual void UpdateEmbeddings(float *embedding_table, const size_t *lookup_ids, const float *update_vals,
|
||||
size_t ids_size) {}
|
||||
virtual const std::vector<size_t> &input_sizes() const = 0;
|
||||
virtual const std::vector<size_t> &output_sizes() const = 0;
|
||||
virtual const std::vector<size_t> &workspace_sizes() const = 0;
|
||||
virtual int64_t offset() const { return 0; }
|
||||
|
||||
protected:
|
||||
virtual void ReInit(const std::vector<AddressPtr> &) {}
|
||||
void Shard(ShapeVector *shape, int axis) const;
|
||||
|
||||
size_t rank_id_;
|
||||
size_t pserver_num_;
|
||||
size_t worker_num_;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PSERVER_KERNEL_H_
|
|
@ -1,29 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/cpu/kernel/ps/pull_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
std::vector<KernelAttr> PullKernelMod::GetOpSupport() {
|
||||
static const std::vector<KernelAttr> support_list = {
|
||||
KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32)};
|
||||
return support_list;
|
||||
}
|
||||
|
||||
MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, Pull, PullKernelMod);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -1,92 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PULL_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PULL_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "ps/worker.h"
|
||||
#include "ps/util.h"
|
||||
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
||||
#include "plugin/factory/ms_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class PullKernelMod : public DeprecatedNativeCpuKernelMod {
|
||||
public:
|
||||
PullKernelMod() : key_(UINT64_MAX), keys_size_(sizeof(size_t)), var_size_(sizeof(size_t)) {}
|
||||
~PullKernelMod() override = default;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &) override {
|
||||
if (inputs.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "Inputs size is " << inputs.size() << ", but PullKernelMod needs 2.";
|
||||
}
|
||||
bool init_in_server = mindspore::ps::Worker::GetInstance().GetParamInitInServer(param_name_);
|
||||
// If init_in_server, forward kernel should run in server too.
|
||||
if (!init_in_server) {
|
||||
mindspore::ps::Worker::GetInstance().Pull(key_, inputs[1]->addr, inputs[1]->size);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Init(const CNodePtr &kernel_node) override {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but pull needs 2 inputs.";
|
||||
return;
|
||||
}
|
||||
|
||||
auto key_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
keys_size_ *= SizeOf(key_shape);
|
||||
|
||||
auto var_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
var_size_ *= SizeOf(var_shape);
|
||||
|
||||
auto param_node = common::AnfAlgo::GetInputNode(kernel_node, 1);
|
||||
MS_EXCEPTION_IF_NULL(param_node);
|
||||
param_name_ = param_node->fullname_with_scope();
|
||||
|
||||
if (mindspore::ps::PSContext::instance()->is_worker()) {
|
||||
key_ = common::AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
|
||||
}
|
||||
InitSizeLists();
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<KernelAttr> GetOpSupport() override;
|
||||
|
||||
void InitKernel(const CNodePtr &) override { return; }
|
||||
|
||||
protected:
|
||||
void InitSizeLists() {
|
||||
input_size_list_.push_back(keys_size_);
|
||||
input_size_list_.push_back(var_size_);
|
||||
output_size_list_.push_back(0);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t key_;
|
||||
size_t keys_size_;
|
||||
size_t var_size_;
|
||||
std::string param_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PULL_KERNEL_H_
|
|
@ -1,53 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/cpu/kernel/ps/push_kernel.h"
|
||||
#include <tuple>
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
std::vector<std::tuple<KernelAttr, PushKernelMod::PushFunc, PushKernelMod::PushInitFunc>> PushKernelMod::func_list_ = {
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeUInt64),
|
||||
&PushKernelMod::LaunchKernel<float>, &PushKernelMod::InitFunc<float>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt64),
|
||||
&PushKernelMod::LaunchKernel<float>, &PushKernelMod::InitFunc<float>},
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeUInt64),
|
||||
&PushKernelMod::LaunchKernel<float>, &PushKernelMod::InitFunc<float>}};
|
||||
|
||||
std::vector<KernelAttr> PushKernelMod::GetOpSupport() {
|
||||
std::vector<KernelAttr> support_list;
|
||||
(void)std::transform(
|
||||
func_list_.begin(), func_list_.end(), std::back_inserter(support_list),
|
||||
[](const std::tuple<KernelAttr, PushFunc, PushInitFunc> &tuple_item) { return std::get<0>(tuple_item); });
|
||||
return support_list;
|
||||
}
|
||||
|
||||
MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, Push, PushKernelMod);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -1,117 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PUSH_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PUSH_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <tuple>
|
||||
#include "ps/worker.h"
|
||||
#include "ps/util.h"
|
||||
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
||||
#include "plugin/factory/ms_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class PushKernelMod : public DeprecatedNativeCpuKernelMod {
|
||||
public:
|
||||
PushKernelMod() : key_(UINT64_MAX) {}
|
||||
~PushKernelMod() override = default;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override {
|
||||
return kernel_func_(this, inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
void Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_attr = GetKernelAttrFromNode(kernel_node);
|
||||
auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport());
|
||||
if (!is_match) {
|
||||
MS_LOG(EXCEPTION) << "Push does not support this kernel data type: " << kernel_attr;
|
||||
}
|
||||
kernel_func_ = std::get<1>(func_list_[index]);
|
||||
const size_t kTwoIdx = 2;
|
||||
init_func_ = std::get<kTwoIdx>(func_list_[index]);
|
||||
|
||||
init_func_(this, kernel_node);
|
||||
}
|
||||
|
||||
void InitKernel(const CNodePtr &) override { return; }
|
||||
|
||||
std::vector<KernelAttr> GetOpSupport() override;
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
void InitFunc(const CNodePtr &kernel_node) {
|
||||
key_ = common::AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
|
||||
auto optim_input_shapes =
|
||||
common::AnfAlgo::GetNodeAttr<std::vector<std::vector<int64_t>>>(kernel_node, "optim_input_shapes");
|
||||
auto only_shape_indices = common::AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "only_shape_indices");
|
||||
MS_LOG(INFO) << "Key " << key_ << " optimizer input shapes are:" << optim_input_shapes;
|
||||
MS_LOG(INFO) << "Only init shape indices are " << only_shape_indices;
|
||||
for (size_t i = 0; i < optim_input_shapes.size(); i++) {
|
||||
auto shape = optim_input_shapes[i];
|
||||
mindspore::ps::Worker::GetInstance().SetOptimInputShapes(key_, shape);
|
||||
if (std::count(only_shape_indices.begin(), only_shape_indices.end(), i) == 0) {
|
||||
size_t size = sizeof(T);
|
||||
for (size_t j = 0; j < shape.size(); j++) {
|
||||
size *= LongToSize(shape[j]);
|
||||
}
|
||||
input_size_list_.push_back(size);
|
||||
}
|
||||
}
|
||||
|
||||
output_size_list_.push_back(sizeof(size_t));
|
||||
return;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (outputs.size() != 1) {
|
||||
MS_LOG(EXCEPTION) << "Outputs size is " << outputs.size() << ", but PushKernelMod needs 1.";
|
||||
}
|
||||
std::vector<size_t> keys;
|
||||
std::vector<uintptr_t> addrs;
|
||||
std::vector<int64_t> sizes;
|
||||
for (auto input : inputs) {
|
||||
keys.push_back(key_);
|
||||
addrs.push_back(reinterpret_cast<uintptr_t>(input->addr));
|
||||
sizes.push_back(SizeToLong(input->size) / SizeToLong(sizeof(T)));
|
||||
}
|
||||
mindspore::ps::Worker::GetInstance().Push(keys, addrs, sizes);
|
||||
auto ret = memcpy_s(outputs[0]->addr, outputs[0]->size, &key_, sizeof(size_t));
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "Lookup id memcpy failed.";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
using PushFunc =
|
||||
std::function<bool(PushKernelMod *, const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &, const std::vector<kernel::AddressPtr> &)>;
|
||||
using PushInitFunc = std::function<void(PushKernelMod *, const CNodePtr &kernel_node)>;
|
||||
static std::vector<std::tuple<KernelAttr, PushFunc, PushInitFunc>> func_list_;
|
||||
PushFunc kernel_func_;
|
||||
PushInitFunc init_func_;
|
||||
|
||||
size_t key_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PUSH_KERNEL_H_
|
|
@ -1,119 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/cpu/kernel/ps/sparse_apply_adam_ps_kernel.h"
|
||||
#include <memory>
|
||||
#include "kernel/common_utils.h"
|
||||
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
|
||||
#include "ps/util.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
constexpr size_t kSparseApplyAdamPSInputsShapeSize = 11;
|
||||
|
||||
void SparseApplyAdamPSKernelMod::InitKernel(const CNodePtr &cnode,
|
||||
const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &shapes) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_EXCEPTION_IF_NULL(shapes);
|
||||
const std::vector<std::shared_ptr<ShapeVector>> &shape_vec = *shapes;
|
||||
if (shape_vec.size() < kSparseApplyAdamPSInputsShapeSize) {
|
||||
MS_LOG(EXCEPTION) << "SparseApplyAdamPSKernelMod needs 10 input shapes, but got " << shape_vec.size();
|
||||
}
|
||||
ShapeVector &var_shape = *(shape_vec[var_index_]);
|
||||
ShapeVector &m_shape = *(shape_vec[m_index_]);
|
||||
ShapeVector &v_shape = *(shape_vec[v_index_]);
|
||||
const ShapeVector &grad_shape = *(shape_vec[grad_index_]);
|
||||
const ShapeVector &indices_shape = *(shape_vec[indices_index_]);
|
||||
|
||||
Shard(&var_shape, 0);
|
||||
Shard(&m_shape, 0);
|
||||
Shard(&v_shape, 0);
|
||||
if (var_shape.empty()) {
|
||||
MS_LOG(EXCEPTION) << "var must be at least 1D";
|
||||
}
|
||||
if (!IsSameShape(var_shape, m_shape)) {
|
||||
MS_LOG(EXCEPTION) << "var and m must have the same shape";
|
||||
}
|
||||
if (!IsSameShape(var_shape, v_shape)) {
|
||||
MS_LOG(EXCEPTION) << "var and v must have the same shape";
|
||||
}
|
||||
if (var_shape.size() != grad_shape.size()) {
|
||||
MS_LOG(EXCEPTION) << "var and grad must have the same shape size";
|
||||
}
|
||||
var_first_dim_size_ = LongToSize(var_shape[0]);
|
||||
for (size_t i = 1; i < var_shape.size(); ++i) {
|
||||
if (var_shape[i] != grad_shape[i]) {
|
||||
MS_LOG(EXCEPTION) << "The shape of var and grad must be equal in dimension " << i;
|
||||
}
|
||||
var_outer_dim_size_ *= LongToSize(var_shape[i]);
|
||||
}
|
||||
if (indices_shape.size() != 1) {
|
||||
MS_LOG(EXCEPTION) << "indices must be 1D";
|
||||
}
|
||||
indices_size_ = LongToSize(indices_shape[0]);
|
||||
if (grad_shape[0] != SizeToLong(indices_size_)) {
|
||||
MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices";
|
||||
}
|
||||
if (common::AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) {
|
||||
use_nesterov_ = common::AnfAlgo::GetNodeAttr<bool>(cnode, USE_NESTEROV);
|
||||
}
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
|
||||
(void)workspace_size_list_.emplace_back(var_first_dim_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
|
||||
}
|
||||
|
||||
void SparseApplyAdamPSKernelMod::ReInit(const std::vector<ShapeVector> &shapes) {
|
||||
if (shapes.empty() || shapes[0].empty()) {
|
||||
MS_LOG(EXCEPTION) << "Shape is empty";
|
||||
}
|
||||
const auto &indices_shape = shapes[0];
|
||||
indices_size_ = LongToSize(indices_shape[0]);
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
|
||||
}
|
||||
|
||||
void SparseApplyAdamPSKernelMod::ReInit(const std::vector<AddressPtr> &inputs) {
|
||||
if (inputs.size() < kSparseApplyAdamPSInputsShapeSize) {
|
||||
MS_LOG(EXCEPTION) << "Input numbers can not less to " << kSparseApplyAdamPSInputsShapeSize << ", but got "
|
||||
<< inputs.size();
|
||||
}
|
||||
const auto &indices_addr = inputs[indices_index_];
|
||||
indices_size_ = indices_addr->size / sizeof(int);
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int);
|
||||
}
|
||||
|
||||
bool SparseApplyAdamPSKernelMod::Execute(const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
ReInit(inputs);
|
||||
if (indices_size_ == 0) {
|
||||
return true;
|
||||
}
|
||||
return Launch(inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
const std::vector<size_t> &SparseApplyAdamPSKernelMod::input_sizes() const { return GetInputSizeList(); }
|
||||
|
||||
const std::vector<size_t> &SparseApplyAdamPSKernelMod::output_sizes() const { return GetOutputSizeList(); }
|
||||
|
||||
const std::vector<size_t> &SparseApplyAdamPSKernelMod::workspace_sizes() const { return GetWorkspaceSizeList(); }
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -1,56 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_ADAM_PS_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_ADAM_PS_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
|
||||
#include "plugin/device/cpu/kernel/sparse_apply_adam_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
using mindspore::kernel::SparseApplyAdamCpuKernelMod;
|
||||
class SparseApplyAdamPSKernelMod : public SparseApplyAdamCpuKernelMod, public PServerKernel {
|
||||
public:
|
||||
SparseApplyAdamPSKernelMod(size_t rank_id, size_t pserver_num, size_t worker_num)
|
||||
: PServerKernel(rank_id, pserver_num, worker_num) {}
|
||||
~SparseApplyAdamPSKernelMod() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &) override;
|
||||
void ReInit(const std::vector<ShapeVector> &) override;
|
||||
bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
const std::vector<size_t> &input_sizes() const override;
|
||||
const std::vector<size_t> &output_sizes() const override;
|
||||
const std::vector<size_t> &workspace_sizes() const override;
|
||||
|
||||
protected:
|
||||
void ReInit(const std::vector<AddressPtr> &) override;
|
||||
size_t var_index_{0};
|
||||
size_t m_index_{1};
|
||||
size_t v_index_{2};
|
||||
size_t grad_index_{9};
|
||||
size_t indices_index_{10};
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_ADAM_PS_KERNEL_H_
|
|
@ -1,130 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.h"
|
||||
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
constexpr size_t kSparseApplyFtrlPSInputSize = 5;
|
||||
|
||||
void SparseApplyFtrlPSKernelMod::InitKernel(const CNodePtr &cnode,
|
||||
const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &shapes) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_EXCEPTION_IF_NULL(shapes);
|
||||
const std::vector<std::shared_ptr<ShapeVector>> &shape_vec = *shapes;
|
||||
if (shape_vec.size() < kSparseApplyFtrlPSInputSize) {
|
||||
MS_LOG(EXCEPTION) << "SparseApplyAdamPSKernelMod needs " << kSparseApplyFtrlPSInputSize << " input shapes, but got "
|
||||
<< shape_vec.size();
|
||||
}
|
||||
auto var_shape = *(shape_vec[var_index_]);
|
||||
auto accum_shape = *(shape_vec[accum_index_]);
|
||||
auto linear_shape = *(shape_vec[linear_index_]);
|
||||
auto grad_shape = *(shape_vec[grad_index_]);
|
||||
auto indices_shape = *(shape_vec[indices_index_]);
|
||||
|
||||
Shard(&var_shape, 0);
|
||||
Shard(&accum_shape, 0);
|
||||
Shard(&linear_shape, 0);
|
||||
|
||||
if (var_shape.size() != grad_shape.size()) {
|
||||
MS_LOG(EXCEPTION) << "var and grad must have the same shape size";
|
||||
}
|
||||
if (var_shape.empty()) {
|
||||
MS_LOG(EXCEPTION) << "var must be at least 1D";
|
||||
} else {
|
||||
var_first_dim_size_ = LongToSize(var_shape[0]);
|
||||
}
|
||||
|
||||
for (size_t i = 1; i < var_shape.size(); ++i) {
|
||||
if (var_shape[i] != grad_shape[i]) {
|
||||
MS_LOG(EXCEPTION) << "The shape of var and grad must be equal in dimension " << i;
|
||||
}
|
||||
var_outer_dim_size_ *= LongToSize(var_shape[i]);
|
||||
}
|
||||
if (indices_shape.size() != 1) {
|
||||
MS_LOG(EXCEPTION) << "indices must be a 1D vector";
|
||||
}
|
||||
indices_size_ = LongToSize(indices_shape[0]);
|
||||
if (grad_shape[0] != SizeToLong(indices_size_)) {
|
||||
MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices";
|
||||
}
|
||||
init_accum_ = common::AnfAlgo::GetNodeAttr<float>(cnode, "init_accum");
|
||||
if (init_accum_ < 0) {
|
||||
MS_LOG(EXCEPTION) << "init_accum must be a non-negative scalar";
|
||||
}
|
||||
lr_ = common::AnfAlgo::GetNodeAttr<float>(cnode, "lr");
|
||||
if (lr_ <= 0) {
|
||||
MS_LOG(EXCEPTION) << "lr must be a positive scalar";
|
||||
}
|
||||
l1_ = common::AnfAlgo::GetNodeAttr<float>(cnode, "l1");
|
||||
if (l1_ < 0) {
|
||||
MS_LOG(EXCEPTION) << "l1 must be a non-negative scalar";
|
||||
}
|
||||
l2_ = common::AnfAlgo::GetNodeAttr<float>(cnode, "l2");
|
||||
if (l2_ < 0) {
|
||||
MS_LOG(EXCEPTION) << "l2 must be a non-negative scalar";
|
||||
}
|
||||
lr_power_ = common::AnfAlgo::GetNodeAttr<float>(cnode, "lr_power");
|
||||
if (lr_power_ > 0) {
|
||||
MS_LOG(EXCEPTION) << "lr_power must be a non-positive scalar";
|
||||
}
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
|
||||
}
|
||||
|
||||
void SparseApplyFtrlPSKernelMod::ReInit(const std::vector<ShapeVector> &shapes) {
|
||||
if (shapes.empty() || shapes[0].empty()) {
|
||||
MS_LOG(EXCEPTION) << "Shape can not empty";
|
||||
}
|
||||
const auto &indices_shape = shapes[0];
|
||||
indices_size_ = LongToSize(indices_shape[0]);
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
|
||||
}
|
||||
|
||||
void SparseApplyFtrlPSKernelMod::ReInit(const std::vector<AddressPtr> &inputs) {
|
||||
if (inputs.size() < kSparseApplyFtrlPSInputSize) {
|
||||
MS_LOG(EXCEPTION) << "Input numbers can not be less than " << kSparseApplyFtrlPSInputSize << ", but got "
|
||||
<< inputs.size();
|
||||
}
|
||||
const auto &indices_addr = inputs[indices_index_];
|
||||
indices_size_ = indices_addr->size / sizeof(int);
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
|
||||
}
|
||||
|
||||
bool SparseApplyFtrlPSKernelMod::Execute(const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
ReInit(inputs);
|
||||
if (indices_size_ == 0) {
|
||||
return true;
|
||||
}
|
||||
return Launch(inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
const std::vector<size_t> &SparseApplyFtrlPSKernelMod::input_sizes() const { return GetInputSizeList(); }
|
||||
|
||||
const std::vector<size_t> &SparseApplyFtrlPSKernelMod::output_sizes() const { return GetOutputSizeList(); }
|
||||
|
||||
const std::vector<size_t> &SparseApplyFtrlPSKernelMod::workspace_sizes() const { return GetWorkspaceSizeList(); }
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -1,59 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_SPARSE_APPLY_FTRL_PS_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_SPARSE_APPLY_FTRL_PS_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
|
||||
#include "plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
using mindspore::kernel::SparseApplyFtrlCpuKernelMod;
|
||||
class SparseApplyFtrlPSKernelMod : public SparseApplyFtrlCpuKernelMod, public PServerKernel {
|
||||
public:
|
||||
SparseApplyFtrlPSKernelMod(size_t rank_id, size_t pserver_num, size_t worker_num)
|
||||
: PServerKernel(rank_id, pserver_num, worker_num), init_accum_(0.1) {}
|
||||
~SparseApplyFtrlPSKernelMod() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &) override;
|
||||
void ReInit(const std::vector<ShapeVector> &) override;
|
||||
|
||||
bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
const std::vector<size_t> &input_sizes() const override;
|
||||
const std::vector<size_t> &output_sizes() const override;
|
||||
const std::vector<size_t> &workspace_sizes() const override;
|
||||
const float init_accum() const { return init_accum_; }
|
||||
|
||||
protected:
|
||||
void ReInit(const std::vector<AddressPtr> &) override;
|
||||
float init_accum_{0.1};
|
||||
size_t var_index_{0};
|
||||
size_t accum_index_{1};
|
||||
size_t linear_index_{2};
|
||||
size_t grad_index_{3};
|
||||
size_t indices_index_{4};
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_SPARSE_APPLY_FTRL_PS_KERNEL_H_
|
|
@ -1,120 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/cpu/kernel/ps/sparse_apply_lazy_adam_ps_kernel.h"
|
||||
#include <memory>
|
||||
#include "kernel/common_utils.h"
|
||||
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
|
||||
#include "ps/util.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
constexpr size_t kSparseApplyLazyAdamPSInputsSize = 11;
|
||||
|
||||
void SparseApplyLazyAdamPSKernelMod::InitKernel(
|
||||
const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &shapes) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_EXCEPTION_IF_NULL(shapes);
|
||||
const std::vector<std::shared_ptr<ShapeVector>> &shape_vec = *shapes;
|
||||
if (shape_vec.size() < kSparseApplyLazyAdamPSInputsSize) {
|
||||
MS_LOG(EXCEPTION) << "SparseApplyLazyAdamPSKernelMod needs " << kSparseApplyLazyAdamPSInputsSize
|
||||
<< " input shapes, but got " << shape_vec.size();
|
||||
}
|
||||
ShapeVector &var_shape = *(shape_vec[var_index_]);
|
||||
ShapeVector &m_shape = *(shape_vec[m_index_]);
|
||||
ShapeVector &v_shape = *(shape_vec[v_index_]);
|
||||
const ShapeVector &grad_shape = *(shape_vec[grad_index_]);
|
||||
const ShapeVector &indices_shape = *(shape_vec[indices_index_]);
|
||||
|
||||
Shard(&var_shape, 0);
|
||||
Shard(&m_shape, 0);
|
||||
Shard(&v_shape, 0);
|
||||
|
||||
if (var_shape.empty()) {
|
||||
MS_LOG(EXCEPTION) << "var must be at least 1D";
|
||||
}
|
||||
if (var_shape.size() != grad_shape.size()) {
|
||||
MS_LOG(EXCEPTION) << "var and grad must have the same shape size";
|
||||
}
|
||||
if (!IsSameShape(var_shape, m_shape)) {
|
||||
MS_LOG(EXCEPTION) << "var and m must have the same shape";
|
||||
}
|
||||
if (!IsSameShape(var_shape, v_shape)) {
|
||||
MS_LOG(EXCEPTION) << "var and v must have the same shape";
|
||||
}
|
||||
var_first_dim_size_ = LongToSize(var_shape[0]);
|
||||
for (size_t i = 1; i < var_shape.size(); ++i) {
|
||||
if (var_shape[i] != grad_shape[i]) {
|
||||
MS_LOG(EXCEPTION) << "The shape of var and grad must be equal in dimension " << i;
|
||||
}
|
||||
var_outer_dim_size_ *= LongToSize(var_shape[i]);
|
||||
}
|
||||
if (indices_shape.size() != 1) {
|
||||
MS_LOG(EXCEPTION) << "indices must be 1D";
|
||||
}
|
||||
indices_size_ = LongToSize(indices_shape[0]);
|
||||
if (grad_shape[0] != SizeToLong(indices_size_)) {
|
||||
MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices";
|
||||
}
|
||||
if (common::AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) {
|
||||
use_nesterov_ = common::AnfAlgo::GetNodeAttr<bool>(cnode, USE_NESTEROV);
|
||||
}
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
|
||||
}
|
||||
|
||||
void SparseApplyLazyAdamPSKernelMod::ReInit(const std::vector<ShapeVector> &shapes) {
|
||||
if (shapes.empty() || shapes[0].empty()) {
|
||||
MS_LOG(EXCEPTION) << "Shape can not empty";
|
||||
}
|
||||
const auto &indices_shape = shapes[0];
|
||||
indices_size_ = LongToSize(indices_shape[0]);
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
|
||||
}
|
||||
|
||||
void SparseApplyLazyAdamPSKernelMod::ReInit(const std::vector<AddressPtr> &inputs) {
|
||||
if (inputs.size() < kSparseApplyLazyAdamPSInputsSize) {
|
||||
MS_LOG(EXCEPTION) << "Input shape size can not be less than " << kSparseApplyLazyAdamPSInputsSize << ", but got "
|
||||
<< inputs.size();
|
||||
}
|
||||
const auto &indices_addr = inputs[indices_index_];
|
||||
indices_size_ = indices_addr->size / sizeof(int);
|
||||
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
|
||||
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
|
||||
}
|
||||
|
||||
bool SparseApplyLazyAdamPSKernelMod::Execute(const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
ReInit(inputs);
|
||||
if (indices_size_ == 0) {
|
||||
return true;
|
||||
}
|
||||
return Launch(inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
const std::vector<size_t> &SparseApplyLazyAdamPSKernelMod::input_sizes() const { return GetInputSizeList(); }
|
||||
|
||||
const std::vector<size_t> &SparseApplyLazyAdamPSKernelMod::output_sizes() const { return GetOutputSizeList(); }
|
||||
|
||||
const std::vector<size_t> &SparseApplyLazyAdamPSKernelMod::workspace_sizes() const { return GetWorkspaceSizeList(); }
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -1,55 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_LAZY_ADAM_PS_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_LAZY_ADAM_PS_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
|
||||
#include "plugin/device/cpu/kernel/sparse_apply_lazy_adam_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace ps {
|
||||
using mindspore::kernel::SparseApplyLazyAdamCpuKernelMod;
|
||||
class SparseApplyLazyAdamPSKernelMod : public SparseApplyLazyAdamCpuKernelMod, public PServerKernel {
|
||||
public:
|
||||
SparseApplyLazyAdamPSKernelMod(size_t rank_id, size_t pserver_num, size_t worker_num)
|
||||
: PServerKernel(rank_id, pserver_num, worker_num) {}
|
||||
~SparseApplyLazyAdamPSKernelMod() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &) override;
|
||||
void ReInit(const std::vector<ShapeVector> &) override;
|
||||
bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
const std::vector<size_t> &input_sizes() const override;
|
||||
const std::vector<size_t> &output_sizes() const override;
|
||||
const std::vector<size_t> &workspace_sizes() const override;
|
||||
|
||||
protected:
|
||||
void ReInit(const std::vector<AddressPtr> &) override;
|
||||
size_t var_index_{0};
|
||||
size_t m_index_{1};
|
||||
size_t v_index_{2};
|
||||
size_t grad_index_{9};
|
||||
size_t indices_index_{10};
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_LAZY_ADAM_PS_KERNEL_H_
|
|
@ -18,7 +18,6 @@
|
|||
#include "plugin/device/gpu/hal/device/gpu_memory_allocator.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "include/common/utils/convert_utils.h"
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#include "plugin/device/gpu/hal/device/gpu_device_manager.h"
|
||||
#include "plugin/device/gpu/hal/device/gpu_common.h"
|
||||
namespace mindspore {
|
||||
|
@ -73,9 +72,6 @@ bool GPUMemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList
|
|||
void GPUMemoryManager::Initialize() {
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
if (ps::ps_cache_instance.initialized_ps_cache()) {
|
||||
return;
|
||||
}
|
||||
auto device_addr = MallocMemFromMemPool(1, false);
|
||||
if (!device_addr) {
|
||||
MS_LOG(EXCEPTION) << "Dynamic memory pool init error.";
|
||||
|
|
|
@ -1,112 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/hal/device/ps/gpu_ps_cache.h"
|
||||
#include "ps/ps_cache/ps_cache_factory.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/hash_impl.cuh"
|
||||
#include "plugin/device/gpu/hal/device/gpu_common.h"
|
||||
#include "plugin/device/gpu/hal/device/cuda_driver.h"
|
||||
#include "plugin/device/gpu/hal/device/gpu_memory_allocator.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
namespace gpu {
|
||||
MS_REG_PS_CACHE(kGPUDevice, GPUPsCache);
|
||||
bool GPUPsCache::InitDevice(uint32_t device_id, const void *) {
|
||||
bool ret = device::gpu::CudaDriver::SetDevice(UintToInt(device_id));
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "Failed to set device id:" << device_id;
|
||||
return false;
|
||||
}
|
||||
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(cudaStreamCreate(reinterpret_cast<CUstream_st **>(&stream_)),
|
||||
"Cuda create stream failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
void *GPUPsCache::MallocMemory(size_t size) {
|
||||
return device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(size);
|
||||
}
|
||||
|
||||
void GPUPsCache::FreeMemory(void *device_addr) {
|
||||
device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(device_addr);
|
||||
}
|
||||
|
||||
bool GPUPsCache::RecordEvent() {
|
||||
event_.reset(new cudaEvent_t());
|
||||
MS_ERROR_IF_NULL_W_RET_VAL(event_, false);
|
||||
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(cudaEventCreate(&(*event_)), "Cuda create event failed");
|
||||
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(cudaEventRecord(*event_, reinterpret_cast<cudaStream_t>(stream_)),
|
||||
"Cuda record event failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPUPsCache::SynchronizeEvent() {
|
||||
MS_ERROR_IF_NULL_W_RET_VAL(event_, false);
|
||||
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(cudaEventSynchronize(*event_), "Cuda sync event failed");
|
||||
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(cudaEventDestroy(*event_), "Cuda destroy event failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPUPsCache::SynchronizeStream() {
|
||||
MS_ERROR_IF_NULL_W_RET_VAL(stream_, false);
|
||||
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_)),
|
||||
"Cuda sync stream failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPUPsCache::CopyHostMemToDevice(void *dst, const void *src, size_t size) {
|
||||
MS_ERROR_IF_NULL(dst);
|
||||
MS_ERROR_IF_NULL(src);
|
||||
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(
|
||||
cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_)),
|
||||
"Cuda memcpy failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPUPsCache::CopyDeviceMemToHost(void *dst, const void *src, size_t size) {
|
||||
MS_ERROR_IF_NULL(dst);
|
||||
MS_ERROR_IF_NULL(src);
|
||||
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(
|
||||
cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_)),
|
||||
"Cuda memcpy failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPUPsCache::HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr, size_t,
|
||||
size_t embedding_size, size_t swap_out_size) {
|
||||
MS_ERROR_IF_NULL(hash_table_addr);
|
||||
MS_ERROR_IF_NULL(swap_out_value_addr);
|
||||
MS_ERROR_IF_NULL(swap_out_index_addr);
|
||||
DoHashSwapOut(reinterpret_cast<float *>(hash_table_addr), reinterpret_cast<float *>(swap_out_value_addr),
|
||||
reinterpret_cast<int *>(swap_out_index_addr), swap_out_size, embedding_size,
|
||||
reinterpret_cast<cudaStream_t>(stream_));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GPUPsCache::HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr, size_t,
|
||||
size_t embedding_size, size_t swap_in_size) {
|
||||
MS_ERROR_IF_NULL(hash_table_addr);
|
||||
MS_ERROR_IF_NULL(swap_in_value_addr);
|
||||
MS_ERROR_IF_NULL(swap_in_index_addr);
|
||||
DoHashSwapIn(reinterpret_cast<float *>(hash_table_addr), reinterpret_cast<float *>(swap_in_value_addr),
|
||||
reinterpret_cast<int *>(swap_in_index_addr), swap_in_size, embedding_size,
|
||||
reinterpret_cast<cudaStream_t>(stream_));
|
||||
return true;
|
||||
}
|
||||
} // namespace gpu
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
|
@ -1,50 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_DEVICE_PS_GPU_PS_CACHE_H_
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_DEVICE_PS_GPU_PS_CACHE_H_
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <memory>
|
||||
#include "ps/ps_cache/ps_cache_basic.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
namespace gpu {
|
||||
class GPUPsCache : public PsCacheBasic {
|
||||
public:
|
||||
GPUPsCache() = default;
|
||||
~GPUPsCache() override = default;
|
||||
bool InitDevice(uint32_t device_id, const void *context) override;
|
||||
void *MallocMemory(size_t size) override;
|
||||
void FreeMemory(void *device_addr) override;
|
||||
bool RecordEvent() override;
|
||||
bool SynchronizeEvent() override;
|
||||
bool SynchronizeStream() override;
|
||||
bool CopyHostMemToDevice(void *dst, const void *src, size_t size) override;
|
||||
bool CopyDeviceMemToHost(void *dst, const void *src, size_t size) override;
|
||||
bool HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr, size_t cache_vocab_size,
|
||||
size_t embedding_size, size_t swap_out_size) override;
|
||||
bool HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr, size_t cache_vocab_size,
|
||||
size_t embedding_size, size_t swap_in_size) override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<cudaEvent_t> event_;
|
||||
};
|
||||
} // namespace gpu
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_DEVICE_PS_GPU_PS_CACHE_H_
|
|
@ -88,7 +88,7 @@
|
|||
#include "kernel/graph_kernel_info.h"
|
||||
#ifdef WITH_BACKEND
|
||||
#include "ps/util.h"
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#include "ps/ps_context.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -362,12 +362,6 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
|
|||
auto input_node = input_nodes[i];
|
||||
MS_EXCEPTION_IF_NULL(input_node);
|
||||
if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0)) {
|
||||
#ifdef WITH_BACKEND
|
||||
const std::string ¶m_name = input_node->fullname_with_scope();
|
||||
if (ps::ps_cache_instance.IsHashTable(param_name)) {
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
auto pk_node = input_node->cast<ParameterPtr>();
|
||||
auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
|
@ -443,9 +437,6 @@ GraphId GPUSession::CompileGraphImpl(const KernelGraphPtr &graph) {
|
|||
GraphKernelOptimize(graph);
|
||||
// Start gpu kernel runtime
|
||||
StartKernelRT();
|
||||
#ifdef WITH_BACKEND
|
||||
InitPsWorker(graph);
|
||||
#endif
|
||||
// Assign CUDA streams
|
||||
AssignStream(graph);
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
|
@ -525,11 +516,6 @@ void GPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_grap
|
|||
|
||||
E2eDump::UpdateIterOldRTDump(kernel_graph.get());
|
||||
#endif
|
||||
|
||||
#ifdef WITH_BACKEND
|
||||
// Initialize parameter server
|
||||
InitPSParamAndOptim(kernel_graph, inputs);
|
||||
#endif
|
||||
}
|
||||
|
||||
// GPU old runtime.
|
||||
|
@ -563,12 +549,6 @@ void GPUSession::ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph)
|
|||
int kernel_num = kernel_graph->execution_order().size();
|
||||
int64_t loopsize = (kernel_num > 1) ? ConfigManager::GetInstance().gpu_loopsink_size() : 1;
|
||||
for (int64_t i = 0; i < loopsize; i++) {
|
||||
#ifdef WITH_BACKEND
|
||||
std::string channel_name;
|
||||
if (ps::PsDataPrefetch::GetInstance().cache_enable() && IsGetNextGraph(kernel_graph, &channel_name)) {
|
||||
ps::ps_cache_instance.IncreaseGraphStep(channel_name);
|
||||
}
|
||||
#endif
|
||||
Execute(kernel_graph);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,7 +14,6 @@ if(NOT ENABLE_CPU OR WIN32)
|
|||
list(REMOVE_ITEM _PS_SRC_FILES "core/communicator/tcp_server.cc")
|
||||
list(REMOVE_ITEM _PS_SRC_FILES "core/node.cc")
|
||||
list(REMOVE_ITEM _PS_SRC_FILES "core/node_manager.cc")
|
||||
list(REMOVE_ITEM _PS_SRC_FILES "ps_cache/ps_cache_manager.cc")
|
||||
list(REMOVE_ITEM _PS_SRC_FILES "core/worker_node.cc")
|
||||
list(REMOVE_ITEM _PS_SRC_FILES "core/ps_worker_node.cc")
|
||||
list(REMOVE_ITEM _PS_SRC_FILES "core/server_node.cc")
|
||||
|
@ -44,13 +43,6 @@ if(NOT ENABLE_CPU OR WIN32)
|
|||
list(REMOVE_ITEM _PS_SRC_FILES "core/instance_manager.cc")
|
||||
endif()
|
||||
|
||||
if(NOT ENABLE_D)
|
||||
list(REMOVE_ITEM _PS_SRC_FILES "ps_cache/ascend/ascend_ps_cache.cc")
|
||||
endif()
|
||||
|
||||
if(NOT ENABLE_GPU)
|
||||
list(REMOVE_ITEM _PS_SRC_FILES "ps_cache/gpu/gpu_ps_cache.cc")
|
||||
endif()
|
||||
|
||||
list(REMOVE_ITEM _PS_SRC_FILES "ps_cache/ps_data/ps_data_prefetch.cc")
|
||||
list(REMOVE_ITEM _PS_SRC_FILES "ps_cache/ps_data/ps_data_channel.cc")
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ps/embedding_table_shard_metadata.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
uint64_t EmbeddingTableShardMetadata::begin() const { return begin_; }
|
||||
|
||||
uint64_t EmbeddingTableShardMetadata::end() const { return end_; }
|
||||
|
||||
uint64_t EmbeddingTableShardMetadata::size() const { return end_ - begin_; }
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
|
@ -1,40 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PS_EMBEDDING_TABLE_SHARD_METADATA_H_
|
||||
#define MINDSPORE_CCSRC_PS_EMBEDDING_TABLE_SHARD_METADATA_H_
|
||||
|
||||
#include <iostream>
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
class EmbeddingTableShardMetadata {
|
||||
public:
|
||||
explicit EmbeddingTableShardMetadata(uint64_t begin, uint64_t end) : begin_(begin), end_(end) {}
|
||||
virtual ~EmbeddingTableShardMetadata() = default;
|
||||
|
||||
uint64_t begin() const;
|
||||
uint64_t end() const;
|
||||
uint64_t size() const;
|
||||
|
||||
private:
|
||||
uint64_t begin_;
|
||||
uint64_t end_;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PS_EMBEDDING_TABLE_SHARD_METADATA_H_
|
|
@ -1,414 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ps/optimizer_info.h"
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include "ps/util.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) {
|
||||
MS_EXCEPTION_IF_NULL(workspace);
|
||||
workspaces_.push_back(workspace);
|
||||
}
|
||||
|
||||
const std::vector<AddressPtr> &OptimizerInfo::inputs() const { return inputs_; }
|
||||
|
||||
const std::vector<AddressPtr> &OptimizerInfo::workspaces() const { return workspaces_; }
|
||||
|
||||
const std::vector<AddressPtr> &OptimizerInfo::outputs() const { return outputs_; }
|
||||
|
||||
bool OptimizerInfo::IsSparse() const { return false; }
|
||||
|
||||
const size_t OptimizerInfo::indice_size() const { return 0; }
|
||||
|
||||
size_t OptimizerInfo::grad_index() { return 0; }
|
||||
|
||||
size_t OptimizerInfo::indices_index() { return 0; }
|
||||
|
||||
template <typename T>
|
||||
void OptimizerInfo::UpdateOptimInputValue(const std::string &optim_type, const std::string &input_name, void *data,
|
||||
const Lengths &lens) {
|
||||
MS_EXCEPTION_IF_NULL(data);
|
||||
if (kOptimToOriginIdx.count(optim_type) == 0 || kOptimToPSSendIdx.count(optim_type) == 0) {
|
||||
MS_LOG(EXCEPTION) << "Optimizer type " << optim_type << " in not supported.";
|
||||
}
|
||||
const OptimOriginIdx &origin_input_map = kOptimToOriginIdx.at(optim_type);
|
||||
const OptimPSSendIdx &ps_send_index_map = kOptimToPSSendIdx.at(optim_type);
|
||||
if (ps_send_index_map.count(input_name) == 0 || origin_input_map.count(input_name) == 0) {
|
||||
MS_LOG(EXCEPTION) << "Optimizer " << optim_type << " has no input for " << input_name;
|
||||
}
|
||||
|
||||
size_t origin_index = origin_input_map.at(input_name);
|
||||
size_t ps_send_index = ps_send_index_map.at(input_name);
|
||||
if (ps_send_index >= lens.size() || origin_index >= inputs_.size()) {
|
||||
MS_LOG(EXCEPTION) << "Index is out of bound for optimizer " << optim_type << ", origin_index:" << origin_index
|
||||
<< ", ps_send_index:" << ps_send_index;
|
||||
}
|
||||
EXC_IF_VEC_IDX_OOB(lens, ps_send_index);
|
||||
size_t size = IntToSize(lens[ps_send_index]) * sizeof(T);
|
||||
int offset = std::accumulate(lens.begin(), lens.begin() + SizeToInt(ps_send_index), 0, std::plus<int>());
|
||||
AddressPtr optim_input = inputs_[origin_index];
|
||||
MS_EXCEPTION_IF_NULL(optim_input);
|
||||
|
||||
void *dst_data = optim_input->addr;
|
||||
T *src_data = reinterpret_cast<T *>(data) + offset;
|
||||
MS_EXCEPTION_IF_NULL(dst_data);
|
||||
MS_EXCEPTION_IF_NULL(src_data);
|
||||
errno_t ret = memcpy_s(optim_input->addr, optim_input->size, src_data, size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
|
||||
MS_EXCEPTION_IF_NULL(gradient()->addr);
|
||||
float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
|
||||
size_t size = gradient()->size / sizeof(float);
|
||||
size_t grad_index = this->grad_index();
|
||||
size_t grad_offset = 0;
|
||||
for (size_t i = 0; i < grad_index; i++) {
|
||||
grad_offset += IntToSize(lengths[i]);
|
||||
}
|
||||
float *grad_data = const_cast<float *>(values.data()) + grad_offset;
|
||||
MS_EXCEPTION_IF_NULL(grad_data);
|
||||
#define google mindspore_private
|
||||
CHECK_EQ(size, IntToSize(lengths[grad_index]));
|
||||
#undef google
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
accum_grad_data[i] += grad_data[i];
|
||||
}
|
||||
}
|
||||
|
||||
void DenseOptimInfo::ComputeMean(const std::vector<ShapeVector> &, size_t n, size_t, size_t) {
|
||||
if (n > 1) {
|
||||
MS_EXCEPTION_IF_NULL(gradient()->addr);
|
||||
float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
|
||||
size_t size = gradient()->size / sizeof(float);
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
accum_grad_data[i] /= n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DenseOptimInfo::Reset() {
|
||||
MS_EXCEPTION_IF_NULL(gradient()->addr);
|
||||
errno_t ret = memset_s(gradient()->addr, gradient()->size, 0x00, gradient()->size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
|
||||
// Append grad data to the end
|
||||
MS_EXCEPTION_IF_NULL(gradient()->addr);
|
||||
float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
|
||||
|
||||
size_t grad_index = this->grad_index();
|
||||
size_t grad_offset = 0;
|
||||
for (size_t i = 0; i < grad_index; i++) {
|
||||
grad_offset += IntToSize(lengths[i]);
|
||||
}
|
||||
float *incr_grad_data = const_cast<float *>(values.data()) + grad_offset;
|
||||
MS_EXCEPTION_IF_NULL(incr_grad_data);
|
||||
|
||||
size_t incr_grad_size = IntToSize(lengths[grad_index]) * sizeof(float);
|
||||
size_t dst_size = incr_grad_size;
|
||||
size_t src_size = incr_grad_size;
|
||||
void *dst_data = accum_grad_data + grads_offset_;
|
||||
void *src_data = incr_grad_data;
|
||||
MS_EXCEPTION_IF_NULL(dst_data);
|
||||
MS_EXCEPTION_IF_NULL(src_data);
|
||||
errno_t ret = memcpy_s(dst_data, dst_size, src_data, src_size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
return;
|
||||
}
|
||||
grads_offset_ += IntToSize(lengths[grad_index]);
|
||||
gradient()->size += incr_grad_size;
|
||||
|
||||
// Append indice data to the end
|
||||
MS_EXCEPTION_IF_NULL(indices()->addr);
|
||||
int *accum_indices_data = reinterpret_cast<int *>(indices()->addr);
|
||||
MS_EXCEPTION_IF_NULL(accum_indices_data);
|
||||
|
||||
size_t indices_index = this->indices_index();
|
||||
size_t indice_offset = 0;
|
||||
for (size_t i = 0; i < indices_index; i++) {
|
||||
indice_offset += IntToSize(lengths[i]);
|
||||
}
|
||||
|
||||
void *incr_indice_data_temp = const_cast<float *>(values.data()) + indice_offset;
|
||||
MS_EXCEPTION_IF_NULL(incr_indice_data_temp);
|
||||
int *incr_indice_data = reinterpret_cast<int *>(incr_indice_data_temp);
|
||||
MS_EXCEPTION_IF_NULL(incr_indice_data);
|
||||
|
||||
size_t incr_indice_size = lengths[indices_index];
|
||||
size_t incr_indice_data_size = incr_indice_size * sizeof(int);
|
||||
dst_size = incr_indice_data_size;
|
||||
src_size = incr_indice_data_size;
|
||||
dst_data = accum_indices_data + indices_offset_;
|
||||
src_data = incr_indice_data;
|
||||
MS_EXCEPTION_IF_NULL(dst_data);
|
||||
MS_EXCEPTION_IF_NULL(src_data);
|
||||
errno_t ret2 = memcpy_s(dst_data, dst_size, src_data, src_size);
|
||||
if (ret2 != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
|
||||
return;
|
||||
}
|
||||
indices_offset_ += IntToSize(lengths[indices_index]);
|
||||
indices()->size += incr_indice_data_size;
|
||||
}
|
||||
|
||||
void SparseOptimInfo::ComputeMean(const std::vector<ShapeVector> &shapes, size_t n, size_t server_num, size_t rank_id) {
|
||||
if (n == 0 || indices()->size == 0) {
|
||||
MS_LOG(EXCEPTION) << "The size of shapes or indices are 0.";
|
||||
}
|
||||
size_t indices_size = static_cast<size_t>(indices()->size / sizeof(int));
|
||||
size_t segment_size = gradient()->size / indices()->size;
|
||||
|
||||
std::vector<float> new_grad(indices_size * segment_size);
|
||||
std::vector<int> new_indices(indices_size);
|
||||
mindspore::kernel::SparseGradient<int> unique_sparse_grad({new_grad.data(), new_indices.data(), indices_size});
|
||||
|
||||
if (shapes.size() < 2 || shapes[1].empty()) {
|
||||
MS_LOG(EXCEPTION) << "No input shape found";
|
||||
}
|
||||
auto input_shapes = shapes[1];
|
||||
if (input_shapes.size() == 0) {
|
||||
MS_LOG(EXCEPTION) << "Invalid input shapes";
|
||||
}
|
||||
size_t first_dim_size = input_shapes.front();
|
||||
size_t outer_dim_size = segment_size;
|
||||
|
||||
if (first_dim_size == 0 || outer_dim_size == 0) {
|
||||
MS_LOG(ERROR) << "Invalid first dim size";
|
||||
}
|
||||
|
||||
MS_EXCEPTION_IF_NULL(gradient()->addr);
|
||||
MS_EXCEPTION_IF_NULL(indices()->addr);
|
||||
float *grad_data = reinterpret_cast<float *>(gradient()->addr);
|
||||
int *indices_data = reinterpret_cast<int *>(indices()->addr);
|
||||
|
||||
if (sharded_) {
|
||||
auto original_row_count = input_shapes.front();
|
||||
if (original_row_count > 0) {
|
||||
size_t offset = 0;
|
||||
std::map<int64_t, int64_t> rank_dims =
|
||||
Util::AllRankLocalShard(original_row_count, SizeToLong(rank_id), SizeToLong(server_num));
|
||||
for (size_t i = 0; i < rank_id; i++) {
|
||||
if (rank_dims.count(i) == 0) {
|
||||
MS_LOG(EXCEPTION) << "No local shard number for rank " << i;
|
||||
}
|
||||
offset += LongToSize(rank_dims[i]);
|
||||
}
|
||||
for (size_t j = 0; j < indices_size; j++) {
|
||||
indices_data[j] -= SizeToInt(offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Util::ReduceSparseGradient(grad_data, indices_data, indices_size, segment_size, first_dim_size, outer_dim_size,
|
||||
&unique_sparse_grad);
|
||||
|
||||
size_t reduced_grad_size = unique_sparse_grad.indices_size_ * segment_size * sizeof(float);
|
||||
MS_EXCEPTION_IF_NULL(unique_sparse_grad.value_);
|
||||
errno_t ret = memcpy_s(gradient()->addr, gradient()->size, unique_sparse_grad.value_, reduced_grad_size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
return;
|
||||
}
|
||||
|
||||
size_t reduced_indice_size = unique_sparse_grad.indices_size_ * sizeof(int);
|
||||
MS_EXCEPTION_IF_NULL(unique_sparse_grad.indices_);
|
||||
ret = memcpy_s(indices()->addr, indices()->size, unique_sparse_grad.indices_, reduced_indice_size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
return;
|
||||
}
|
||||
|
||||
gradient()->size = reduced_grad_size;
|
||||
indices()->size = reduced_indice_size;
|
||||
|
||||
for (size_t i = 0; i < unique_sparse_grad.indices_size_ * segment_size; i++) {
|
||||
grad_data[i] = grad_data[i] / n;
|
||||
}
|
||||
}
|
||||
|
||||
void SparseOptimInfo::Reset() {
|
||||
gradient()->size = 0;
|
||||
indices()->size = 0;
|
||||
grads_offset_ = 0;
|
||||
indices_offset_ = 0;
|
||||
}
|
||||
|
||||
MomentumOptimInfo::MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate,
|
||||
const AddressPtr &learning_rate, const AddressPtr &gradient,
|
||||
const AddressPtr &momentum) {
|
||||
MS_EXCEPTION_IF_NULL(weight);
|
||||
MS_EXCEPTION_IF_NULL(accumulate);
|
||||
MS_EXCEPTION_IF_NULL(learning_rate);
|
||||
MS_EXCEPTION_IF_NULL(gradient);
|
||||
MS_EXCEPTION_IF_NULL(momentum);
|
||||
inputs_.push_back(weight);
|
||||
inputs_.push_back(accumulate);
|
||||
inputs_.push_back(learning_rate);
|
||||
inputs_.push_back(gradient);
|
||||
inputs_.push_back(momentum);
|
||||
}
|
||||
|
||||
void MomentumOptimInfo::Update(const Values &values, const Lengths &lens) {
|
||||
UpdateOptimInputValue<float>(kApplyMomentum, "lr", const_cast<float *>(values.data()), lens);
|
||||
}
|
||||
|
||||
const size_t SparseOptimInfo::indice_size() const { return indices_offset_; }
|
||||
|
||||
const AddressPtr &MomentumOptimInfo::gradient() {
|
||||
size_t origin_grad_index = kMomentumOriginIdx.at("grad");
|
||||
EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
|
||||
MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
|
||||
return inputs_[origin_grad_index];
|
||||
}
|
||||
|
||||
const AddressPtr &MomentumOptimInfo::indices() {
|
||||
size_t origin_grad_index = kMomentumOriginIdx.at("grad");
|
||||
EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
|
||||
MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
|
||||
return inputs_[origin_grad_index];
|
||||
}
|
||||
|
||||
size_t MomentumOptimInfo::grad_index() {
|
||||
size_t ps_grad_index = kMomentumPSSendIdx.at("grad");
|
||||
return ps_grad_index;
|
||||
}
|
||||
|
||||
SparseAdamOptimInfo::SparseAdamOptimInfo(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v,
|
||||
const AddressPtr &beta1_power, const AddressPtr &beta2_power,
|
||||
const AddressPtr &learning_rate, const AddressPtr &beta1,
|
||||
const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
|
||||
const AddressPtr &indices, bool sharded) {
|
||||
MS_EXCEPTION_IF_NULL(weight);
|
||||
MS_EXCEPTION_IF_NULL(m);
|
||||
MS_EXCEPTION_IF_NULL(v);
|
||||
MS_EXCEPTION_IF_NULL(beta1_power);
|
||||
MS_EXCEPTION_IF_NULL(beta2_power);
|
||||
MS_EXCEPTION_IF_NULL(learning_rate);
|
||||
MS_EXCEPTION_IF_NULL(beta1);
|
||||
MS_EXCEPTION_IF_NULL(beta2);
|
||||
MS_EXCEPTION_IF_NULL(epsilon);
|
||||
MS_EXCEPTION_IF_NULL(grad);
|
||||
MS_EXCEPTION_IF_NULL(indices);
|
||||
inputs_.push_back(weight);
|
||||
inputs_.push_back(m);
|
||||
inputs_.push_back(v);
|
||||
inputs_.push_back(beta1_power);
|
||||
inputs_.push_back(beta2_power);
|
||||
inputs_.push_back(learning_rate);
|
||||
inputs_.push_back(beta1);
|
||||
inputs_.push_back(beta2);
|
||||
inputs_.push_back(epsilon);
|
||||
inputs_.push_back(grad);
|
||||
inputs_.push_back(indices);
|
||||
grads_offset_ = grad->size / sizeof(float);
|
||||
indices_offset_ = indices->size / sizeof(int);
|
||||
sharded_ = sharded;
|
||||
}
|
||||
|
||||
void SparseAdamOptimInfo::Update(const Values &values, const Lengths &lens) {
|
||||
UpdateOptimInputValue<float>(kSparseAdam, "beta1_power", const_cast<float *>(values.data()), lens);
|
||||
UpdateOptimInputValue<float>(kSparseAdam, "beta2_power", const_cast<float *>(values.data()), lens);
|
||||
UpdateOptimInputValue<float>(kSparseAdam, "lr", const_cast<float *>(values.data()), lens);
|
||||
UpdateOptimInputValue<float>(kSparseAdam, "beta1", const_cast<float *>(values.data()), lens);
|
||||
UpdateOptimInputValue<float>(kSparseAdam, "beta2", const_cast<float *>(values.data()), lens);
|
||||
UpdateOptimInputValue<float>(kSparseAdam, "eps", const_cast<float *>(values.data()), lens);
|
||||
}
|
||||
|
||||
const AddressPtr &SparseAdamOptimInfo::gradient() {
|
||||
size_t origin_grad_index = kSparseAdamOriginIdx.at("grad");
|
||||
EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
|
||||
MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
|
||||
return inputs_[origin_grad_index];
|
||||
}
|
||||
|
||||
const AddressPtr &SparseAdamOptimInfo::indices() {
|
||||
size_t origin_indices_index = kSparseAdamOriginIdx.at("indices");
|
||||
EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
|
||||
MS_EXCEPTION_IF_NULL(inputs_[origin_indices_index]);
|
||||
return inputs_[origin_indices_index];
|
||||
}
|
||||
|
||||
bool SparseAdamOptimInfo::IsSparse() const { return true; }
|
||||
|
||||
size_t SparseAdamOptimInfo::grad_index() {
|
||||
size_t ps_grad_index = kSparseAdamPSSendIdx.at("grad");
|
||||
return ps_grad_index;
|
||||
}
|
||||
|
||||
size_t SparseAdamOptimInfo::indices_index() {
|
||||
size_t ps_indices_index = kSparseAdamPSSendIdx.at("indices");
|
||||
return ps_indices_index;
|
||||
}
|
||||
|
||||
SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
|
||||
const AddressPtr &grad, const AddressPtr &indices, bool sharded) {
|
||||
MS_EXCEPTION_IF_NULL(weight);
|
||||
MS_EXCEPTION_IF_NULL(accum);
|
||||
MS_EXCEPTION_IF_NULL(linear);
|
||||
MS_EXCEPTION_IF_NULL(grad);
|
||||
MS_EXCEPTION_IF_NULL(indices);
|
||||
inputs_.push_back(weight);
|
||||
inputs_.push_back(accum);
|
||||
inputs_.push_back(linear);
|
||||
inputs_.push_back(grad);
|
||||
inputs_.push_back(indices);
|
||||
grads_offset_ = grad->size / sizeof(float);
|
||||
indices_offset_ = indices->size / sizeof(int);
|
||||
sharded_ = sharded;
|
||||
}
|
||||
|
||||
const AddressPtr &SparseFtrlOptimInfo::gradient() {
|
||||
size_t origin_grad_index = kSparseFtrlOriginIdx.at("grad");
|
||||
EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
|
||||
MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
|
||||
return inputs_[origin_grad_index];
|
||||
}
|
||||
|
||||
const AddressPtr &SparseFtrlOptimInfo::indices() {
|
||||
size_t origin_indices_index = kSparseFtrlOriginIdx.at("indices");
|
||||
EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
|
||||
MS_EXCEPTION_IF_NULL(inputs_[origin_indices_index]);
|
||||
return inputs_[origin_indices_index];
|
||||
}
|
||||
|
||||
bool SparseFtrlOptimInfo::IsSparse() const { return true; }
|
||||
|
||||
size_t SparseFtrlOptimInfo::grad_index() {
|
||||
size_t ps_grad_index = kSparseFtrlPSSendIdx.at("grad");
|
||||
return ps_grad_index;
|
||||
}
|
||||
|
||||
size_t SparseFtrlOptimInfo::indices_index() {
|
||||
size_t ps_indices_index = kSparseFtrlPSSendIdx.at("indices");
|
||||
return ps_indices_index;
|
||||
}
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
|
@ -1,127 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_H_
|
||||
#define MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "kernel/kernel.h"
|
||||
#include "ps/constants.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
using mindspore::kernel::AddressPtr;
|
||||
class OptimizerInfo {
|
||||
public:
|
||||
OptimizerInfo() = default;
|
||||
virtual ~OptimizerInfo() = default;
|
||||
|
||||
virtual void Update(const Values &values, const Lengths &lengths) {}
|
||||
virtual void Accumulate(const Values &values, const Lengths &lengths) = 0;
|
||||
virtual void ComputeMean(const std::vector<ShapeVector> &shapes, size_t n, size_t server_num, size_t rank_id) {}
|
||||
virtual void Reset() {}
|
||||
void AddWorkspace(const AddressPtr &workspace);
|
||||
|
||||
virtual const AddressPtr &gradient() = 0;
|
||||
virtual const AddressPtr &indices() = 0;
|
||||
virtual const size_t indice_size() const;
|
||||
const std::vector<AddressPtr> &inputs() const;
|
||||
const std::vector<AddressPtr> &workspaces() const;
|
||||
const std::vector<AddressPtr> &outputs() const;
|
||||
|
||||
virtual bool IsSparse() const;
|
||||
virtual size_t grad_index();
|
||||
virtual size_t indices_index();
|
||||
|
||||
protected:
|
||||
template <typename T>
|
||||
void UpdateOptimInputValue(const std::string &optim_type, const std::string &input_name, void *data,
|
||||
const Lengths &lens);
|
||||
std::vector<AddressPtr> inputs_;
|
||||
std::vector<AddressPtr> workspaces_;
|
||||
std::vector<AddressPtr> outputs_;
|
||||
};
|
||||
|
||||
class DenseOptimInfo : public OptimizerInfo {
|
||||
public:
|
||||
DenseOptimInfo() = default;
|
||||
~DenseOptimInfo() override = default;
|
||||
|
||||
void Accumulate(const Values &values, const Lengths &lens) override;
|
||||
void ComputeMean(const std::vector<ShapeVector> &shapes, size_t n, size_t server_num, size_t rank_id) override;
|
||||
void Reset() override;
|
||||
};
|
||||
|
||||
class SparseOptimInfo : public OptimizerInfo {
|
||||
public:
|
||||
SparseOptimInfo() = default;
|
||||
~SparseOptimInfo() override = default;
|
||||
|
||||
void Accumulate(const Values &values, const Lengths &lens) override;
|
||||
void ComputeMean(const std::vector<ShapeVector> &shapes, size_t n, size_t server_num, size_t rank_id) override;
|
||||
void Reset() override;
|
||||
const size_t indice_size() const override;
|
||||
|
||||
protected:
|
||||
size_t grads_offset_{0};
|
||||
size_t indices_offset_{0};
|
||||
bool sharded_{true};
|
||||
};
|
||||
|
||||
class MomentumOptimInfo : public DenseOptimInfo {
|
||||
public:
|
||||
MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate, const AddressPtr &learning_rate,
|
||||
const AddressPtr &gradient, const AddressPtr &momentum);
|
||||
~MomentumOptimInfo() override = default;
|
||||
|
||||
void Update(const Values &values, const Lengths &lens) override;
|
||||
const AddressPtr &gradient();
|
||||
const AddressPtr &indices();
|
||||
size_t grad_index() override;
|
||||
};
|
||||
|
||||
class SparseAdamOptimInfo : public SparseOptimInfo {
|
||||
public:
|
||||
SparseAdamOptimInfo(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v, const AddressPtr &beta1_power,
|
||||
const AddressPtr &beta2_power, const AddressPtr &learning_rate, const AddressPtr &beta1,
|
||||
const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
|
||||
const AddressPtr &indices, bool sharded);
|
||||
~SparseAdamOptimInfo() override = default;
|
||||
|
||||
void Update(const Values &values, const Lengths &lens) override;
|
||||
const AddressPtr &gradient();
|
||||
const AddressPtr &indices();
|
||||
bool IsSparse() const override;
|
||||
size_t grad_index() override;
|
||||
size_t indices_index() override;
|
||||
};
|
||||
|
||||
class SparseFtrlOptimInfo : public SparseOptimInfo {
|
||||
public:
|
||||
SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
|
||||
const AddressPtr &grad, const AddressPtr &indices, bool sharded);
|
||||
~SparseFtrlOptimInfo() override = default;
|
||||
|
||||
const AddressPtr &gradient();
|
||||
const AddressPtr &indices();
|
||||
bool IsSparse() const override;
|
||||
size_t grad_index() override;
|
||||
size_t indices_index() override;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_H_
|
|
@ -1,248 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ps/optimizer_info_builder.h"
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include "plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
using mindspore::kernel::ps::SparseApplyFtrlPSKernelMod;
|
||||
OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel> &pserver_kernel,
|
||||
const WeightPtr &weight, const Keys &keys, const Values &values,
|
||||
const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num,
|
||||
bool sharded) {
|
||||
MS_EXCEPTION_IF_NULL(pserver_kernel);
|
||||
MS_EXCEPTION_IF_NULL(weight);
|
||||
MS_EXCEPTION_IF_NULL(inputs_shape);
|
||||
OptimizerInfo *optim_info =
|
||||
BuildInputs(weight, keys, values, lens, inputs_shape, worker_num, pserver_kernel, sharded);
|
||||
MS_EXCEPTION_IF_NULL(optim_info);
|
||||
std::vector<size_t> ws_sizes = pserver_kernel->workspace_sizes();
|
||||
BuildWorkspaces(optim_info, ws_sizes, worker_num);
|
||||
BuildOutputs(optim_info, worker_num);
|
||||
return optim_info;
|
||||
}
|
||||
|
||||
void OptimizerInfoBuilder::BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t) {
|
||||
MS_EXCEPTION_IF_NULL(info);
|
||||
for (size_t i = 0; i < ws_sizes.size(); i++) {
|
||||
size_t size = ws_sizes[i];
|
||||
AddressPtr workspace = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(workspace);
|
||||
workspace->addr = new float[size];
|
||||
MS_EXCEPTION_IF_NULL(workspace->addr);
|
||||
workspace->size = size;
|
||||
info->AddWorkspace(workspace);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
AddressPtr OptimizerInfoBuilder::GenInputAddrPtr(const std::string &optim_type, const std::string &input_name,
|
||||
void *ps_data, const Lengths &ps_lens,
|
||||
const InputsShapePtr &inputs_shape) {
|
||||
MS_EXCEPTION_IF_NULL(ps_data);
|
||||
// Take note of that the data type maybe inconsistent in ps_data.
|
||||
MS_LOG(INFO) << "Get input address pointer for optimizer:" << optim_type << ", input name:" << input_name;
|
||||
AddressPtr addr_ptr = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(addr_ptr);
|
||||
|
||||
if (kOptimToOriginIdx.count(optim_type) == 0 || kOptimToPSSendIdx.count(optim_type) == 0) {
|
||||
MS_LOG(EXCEPTION) << "Optimizer type " << optim_type << " in not supported.";
|
||||
}
|
||||
const OptimOriginIdx &origin_input_map = kOptimToOriginIdx.at(optim_type);
|
||||
const OptimPSSendIdx &ps_send_index_map = kOptimToPSSendIdx.at(optim_type);
|
||||
if (ps_send_index_map.count(input_name) == 0 || origin_input_map.count(input_name) == 0) {
|
||||
MS_LOG(EXCEPTION) << "Optimizer " << optim_type << " has no input for " << input_name;
|
||||
}
|
||||
size_t ps_index = ps_send_index_map.at(input_name);
|
||||
if (ps_index == INDEX_NOT_SEND) {
|
||||
MS_LOG(EXCEPTION) << "Input " << input_name << " is not supposed to be sent to PS.";
|
||||
}
|
||||
|
||||
size_t addr_data_size, addr_data_offset;
|
||||
if (inputs_shape != nullptr) {
|
||||
// addr_data_size should be calculated by inputs_shape if it's passed.
|
||||
size_t origin_index = origin_input_map.at(input_name);
|
||||
EXC_IF_VEC_IDX_OOB((*inputs_shape), origin_index);
|
||||
MS_EXCEPTION_IF_NULL((*inputs_shape)[origin_index]);
|
||||
auto shape = *((*inputs_shape)[origin_index]);
|
||||
addr_data_size = SizeOf(shape) * worker_num_;
|
||||
} else {
|
||||
EXC_IF_VEC_IDX_OOB(ps_lens, ps_index);
|
||||
addr_data_size = IntToSize(ps_lens[ps_index]);
|
||||
}
|
||||
addr_data_offset =
|
||||
IntToSize(std::accumulate(ps_lens.begin(), ps_lens.begin() + SizeToInt(ps_index), 0, std::plus<int>()));
|
||||
|
||||
// The size in ps_lens instead of addr_data_size is the size of real data.
|
||||
T *buffer = new T[addr_data_size];
|
||||
addr_ptr->size = IntToSize(ps_lens[ps_index]) * sizeof(T);
|
||||
addr_ptr->addr = buffer;
|
||||
|
||||
size_t dst_size = addr_ptr->size;
|
||||
size_t src_size = addr_ptr->size;
|
||||
void *dst_data = addr_ptr->addr;
|
||||
void *src_data = reinterpret_cast<T *>(ps_data) + addr_data_offset;
|
||||
MS_EXCEPTION_IF_NULL(dst_data);
|
||||
MS_EXCEPTION_IF_NULL(src_data);
|
||||
errno_t ret = memcpy_s(dst_data, dst_size, src_data, src_size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
delete[] buffer;
|
||||
buffer = nullptr;
|
||||
return nullptr;
|
||||
}
|
||||
return addr_ptr;
|
||||
}
|
||||
|
||||
OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &, const Values &values,
|
||||
const Lengths &lens, const InputsShapePtr &, size_t,
|
||||
const std::shared_ptr<PServerKernel> &, bool) {
|
||||
MS_EXCEPTION_IF_NULL(weight);
|
||||
AddressPtr weight_addr = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(weight_addr);
|
||||
weight_addr->addr = weight->data();
|
||||
weight_addr->size = weight->size() * sizeof(float);
|
||||
|
||||
AddressPtr accumulate = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(accumulate);
|
||||
|
||||
accumulate->addr = new float[weight->size()];
|
||||
MS_EXCEPTION_IF_NULL(accumulate->addr);
|
||||
accumulate->size = sizeof(float) * weight->size();
|
||||
int64_t ret = memset_s(accumulate->addr, accumulate->size, 0x00, accumulate->size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
|
||||
delete[] reinterpret_cast<float *>(accumulate->addr);
|
||||
accumulate->addr = nullptr;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
AddressPtr learning_rate = GenInputAddrPtr<float>(kApplyMomentum, "lr", const_cast<float *>(values.data()), lens);
|
||||
MS_EXCEPTION_IF_NULL(learning_rate);
|
||||
AddressPtr gradient = GenInputAddrPtr<float>(kApplyMomentum, "grad", const_cast<float *>(values.data()), lens);
|
||||
MS_EXCEPTION_IF_NULL(gradient);
|
||||
AddressPtr momentum = GenInputAddrPtr<float>(kApplyMomentum, "momentum", const_cast<float *>(values.data()), lens);
|
||||
MS_EXCEPTION_IF_NULL(momentum);
|
||||
return new MomentumOptimInfo(weight_addr, accumulate, learning_rate, gradient, momentum);
|
||||
}
|
||||
|
||||
OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &, const Values &values,
|
||||
const Lengths &lens, const InputsShapePtr &inputs_shape, size_t,
|
||||
const std::shared_ptr<PServerKernel> &, bool sharded) {
|
||||
AddressPtr weight_addr = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(weight_addr);
|
||||
weight_addr->addr = weight->data();
|
||||
weight_addr->size = weight->size() * sizeof(float);
|
||||
|
||||
AddressPtr m = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(m);
|
||||
|
||||
m->addr = new float[weight->size()];
|
||||
MS_EXCEPTION_IF_NULL(m->addr);
|
||||
m->size = weight->size() * sizeof(float);
|
||||
errno_t ret = memset_s(m->addr, m->size, 0x00, m->size);
|
||||
if (ret != 0) {
|
||||
MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
|
||||
delete[] reinterpret_cast<float *>(m->addr);
|
||||
m->addr = nullptr;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
AddressPtr v = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(v);
|
||||
|
||||
v->addr = new float[weight->size()];
|
||||
MS_EXCEPTION_IF_NULL(v->addr);
|
||||
v->size = weight->size() * sizeof(float);
|
||||
ret = memset_s(v->addr, v->size, 0x00, v->size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
|
||||
delete[] reinterpret_cast<float *>(v->addr);
|
||||
v->addr = nullptr;
|
||||
delete[] reinterpret_cast<float *>(m->addr);
|
||||
m->addr = nullptr;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
AddressPtr beta1_power = GenInputAddrPtr<float>(kSparseAdam, "beta1_power", const_cast<float *>(values.data()), lens);
|
||||
MS_EXCEPTION_IF_NULL(beta1_power);
|
||||
AddressPtr beta2_power = GenInputAddrPtr<float>(kSparseAdam, "beta2_power", const_cast<float *>(values.data()), lens);
|
||||
MS_EXCEPTION_IF_NULL(beta2_power);
|
||||
AddressPtr learning_rate = GenInputAddrPtr<float>(kSparseAdam, "lr", const_cast<float *>(values.data()), lens);
|
||||
MS_EXCEPTION_IF_NULL(learning_rate);
|
||||
AddressPtr beta1 = GenInputAddrPtr<float>(kSparseAdam, "beta1", const_cast<float *>(values.data()), lens);
|
||||
MS_EXCEPTION_IF_NULL(beta1);
|
||||
AddressPtr beta2 = GenInputAddrPtr<float>(kSparseAdam, "beta2", const_cast<float *>(values.data()), lens);
|
||||
MS_EXCEPTION_IF_NULL(beta2);
|
||||
AddressPtr epsilon = GenInputAddrPtr<float>(kSparseAdam, "eps", const_cast<float *>(values.data()), lens);
|
||||
MS_EXCEPTION_IF_NULL(epsilon);
|
||||
AddressPtr grad = GenInputAddrPtr<float>(kSparseAdam, "grad", const_cast<float *>(values.data()), lens, inputs_shape);
|
||||
MS_EXCEPTION_IF_NULL(grad);
|
||||
AddressPtr indices =
|
||||
GenInputAddrPtr<float>(kSparseAdam, "indices", const_cast<float *>(values.data()), lens, inputs_shape);
|
||||
MS_EXCEPTION_IF_NULL(indices);
|
||||
return new SparseAdamOptimInfo(weight_addr, m, v, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon,
|
||||
grad, indices, sharded);
|
||||
}
|
||||
|
||||
OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &, const Values &values,
|
||||
const Lengths &lens, const InputsShapePtr &inputs_shape, size_t,
|
||||
const std::shared_ptr<PServerKernel> &pserver_kernel,
|
||||
bool sharded) {
|
||||
MS_EXCEPTION_IF_NULL(inputs_shape);
|
||||
AddressPtr weight_addr = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(weight_addr);
|
||||
weight_addr->addr = weight->data();
|
||||
weight_addr->size = weight->size() * sizeof(float);
|
||||
|
||||
AddressPtr accum = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(accum);
|
||||
|
||||
accum->addr = new float[weight->size()];
|
||||
MS_EXCEPTION_IF_NULL(accum->addr);
|
||||
accum->size = weight->size() * sizeof(float);
|
||||
for (size_t i = 0; i < weight->size(); i++) {
|
||||
float *tmp = reinterpret_cast<float *>(accum->addr);
|
||||
tmp[i] = std::dynamic_pointer_cast<SparseApplyFtrlPSKernelMod>(pserver_kernel)->init_accum();
|
||||
}
|
||||
|
||||
AddressPtr linear = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(linear);
|
||||
|
||||
linear->addr = new float[weight->size()];
|
||||
MS_EXCEPTION_IF_NULL(linear->addr);
|
||||
linear->size = weight->size() * sizeof(float);
|
||||
errno_t ret = memset_s(linear->addr, weight->size() * sizeof(float), 0x00, weight->size() * sizeof(float));
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
|
||||
delete[] reinterpret_cast<float *>(linear->addr);
|
||||
linear->addr = nullptr;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
AddressPtr grad = GenInputAddrPtr<float>(kSparseFtrl, "grad", const_cast<float *>(values.data()), lens, inputs_shape);
|
||||
MS_EXCEPTION_IF_NULL(grad);
|
||||
AddressPtr indices =
|
||||
GenInputAddrPtr<float>(kSparseFtrl, "indices", const_cast<float *>(values.data()), lens, inputs_shape);
|
||||
MS_EXCEPTION_IF_NULL(indices);
|
||||
return new SparseFtrlOptimInfo(weight_addr, accum, linear, grad, indices, sharded);
|
||||
}
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
|
@ -1,83 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_BUILDER_H_
|
||||
#define MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_BUILDER_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "kernel/kernel.h"
|
||||
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
|
||||
#include "ps/optimizer_info.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
using mindspore::kernel::KernelMod;
|
||||
using mindspore::kernel::ps::PServerKernel;
|
||||
class OptimizerInfoBuilder {
|
||||
public:
|
||||
explicit OptimizerInfoBuilder(size_t worker_num) : worker_num_(worker_num) {}
|
||||
virtual ~OptimizerInfoBuilder() = default;
|
||||
|
||||
OptimizerInfo *Build(const std::shared_ptr<PServerKernel> &pserver_kernel, const WeightPtr &weight, const Keys &keys,
|
||||
const Values &values, const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num,
|
||||
bool sharded);
|
||||
|
||||
virtual OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
|
||||
const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num,
|
||||
const std::shared_ptr<PServerKernel> &pserver_kernel, bool sharded) = 0;
|
||||
|
||||
virtual void BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t worker_num);
|
||||
virtual void BuildOutputs(OptimizerInfo *info, size_t worker_num) {}
|
||||
|
||||
protected:
|
||||
template <typename T>
|
||||
AddressPtr GenInputAddrPtr(const std::string &optim_type, const std::string &input_name, void *ps_data,
|
||||
const Lengths &lens, const InputsShapePtr &inputs_shape = nullptr);
|
||||
|
||||
size_t worker_num_;
|
||||
};
|
||||
|
||||
class MomentumOptimInfoBuilder : public OptimizerInfoBuilder {
|
||||
public:
|
||||
explicit MomentumOptimInfoBuilder(size_t worker_num) : OptimizerInfoBuilder(worker_num) {}
|
||||
~MomentumOptimInfoBuilder() = default;
|
||||
OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
|
||||
const InputsShapePtr &inputs_shape, size_t worker_num,
|
||||
const std::shared_ptr<PServerKernel> &pserver_kernel, bool sharded) override;
|
||||
};
|
||||
|
||||
class SparseAdamOptimInfoBuilder : public OptimizerInfoBuilder {
|
||||
public:
|
||||
explicit SparseAdamOptimInfoBuilder(size_t worker_num) : OptimizerInfoBuilder(worker_num) {}
|
||||
~SparseAdamOptimInfoBuilder() = default;
|
||||
OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
|
||||
const InputsShapePtr &inputs_shape, size_t worker_num,
|
||||
const std::shared_ptr<PServerKernel> &pserver_kernel, bool sharded) override;
|
||||
};
|
||||
|
||||
class SparseFtrlOptimInfoBuilder : public OptimizerInfoBuilder {
|
||||
public:
|
||||
explicit SparseFtrlOptimInfoBuilder(size_t worker_num) : OptimizerInfoBuilder(worker_num) {}
|
||||
~SparseFtrlOptimInfoBuilder() = default;
|
||||
OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
|
||||
const InputsShapePtr &inputs_shape, size_t worker_num,
|
||||
const std::shared_ptr<PServerKernel> &pserver_kernel, bool sharded) override;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_BUILDER_H_
|
File diff suppressed because it is too large
Load Diff
|
@ -1,244 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PS_PARAMETER_SERVER_H_
|
||||
#define MINDSPORE_CCSRC_PS_PARAMETER_SERVER_H_
|
||||
|
||||
#include <unistd.h>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <thread>
|
||||
#include <cmath>
|
||||
#include <random>
|
||||
#include <utility>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include <algorithm>
|
||||
|
||||
#include "utils/hash_map.h"
|
||||
#include "ir/func_graph.h"
|
||||
#include "backend/common/session/session_basic.h"
|
||||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "backend/common/session/session_factory.h"
|
||||
#include "ps/optimizer_info.h"
|
||||
#include "ps/optimizer_info_builder.h"
|
||||
#include "ps/ps_context.h"
|
||||
#include "plugin/device/cpu/hal/device/kernel_select_cpu.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
|
||||
#include "plugin/device/cpu/kernel/ps/sparse_apply_adam_ps_kernel.h"
|
||||
#include "plugin/device/cpu/kernel/ps/sparse_apply_lazy_adam_ps_kernel.h"
|
||||
#include "plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.h"
|
||||
#include "plugin/device/cpu/kernel/ps/apply_momentum_ps_kernel.h"
|
||||
#include "plugin/device/cpu/kernel/ps/embedding_look_up_ps_kernel.h"
|
||||
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
|
||||
#include "ps/random_normal/random_normal.h"
|
||||
#include "distributed/persistent/data.h"
|
||||
|
||||
#include "ps/constants.h"
|
||||
#include "ps/util.h"
|
||||
#include "ps/embedding_table_shard_metadata.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "proto/comm.pb.h"
|
||||
#include "proto/ps.pb.h"
|
||||
#include "ps/core/ps_server_node.h"
|
||||
#include "ps/core/node.h"
|
||||
#include "include/backend/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
class BACKEND_EXPORT ParameterServer {
|
||||
public:
|
||||
static ParameterServer &GetInstance();
|
||||
void Run(const FuncGraphPtr &func_graph);
|
||||
|
||||
private:
|
||||
ParameterServer()
|
||||
: pserver_num_(0),
|
||||
worker_num_(0),
|
||||
grad_accum_count_(0),
|
||||
handler_(nullptr),
|
||||
func_graph_(nullptr),
|
||||
sess_(nullptr),
|
||||
running_(true),
|
||||
thread_(nullptr),
|
||||
persist_thread_(nullptr),
|
||||
server_node_(nullptr) {}
|
||||
~ParameterServer() = default;
|
||||
ParameterServer(const ParameterServer &) = delete;
|
||||
ParameterServer &operator=(const ParameterServer &) = delete;
|
||||
|
||||
class ServerHandler {
|
||||
public:
|
||||
explicit ServerHandler(ParameterServer *ps) : ps_(ps) {}
|
||||
~ServerHandler() = default;
|
||||
void Init();
|
||||
void operator()(const std::shared_ptr<core::TcpConnection> &conn, const std::shared_ptr<core::MessageMeta> &meta,
|
||||
const void *data, size_t size);
|
||||
void HandlePushReq(const void *data, size_t size, const VectorPtr &res);
|
||||
void HandlePullReq(const void *data, size_t size, const VectorPtr &res);
|
||||
void HandleInitWeights(const void *data, size_t size, const VectorPtr &res);
|
||||
void HandleInitWeightToOptimId(const void *data, size_t size, const VectorPtr &res);
|
||||
void HandleInitInputsShape(const void *data, size_t size, const VectorPtr &res);
|
||||
void HandleInitEmbeddings(const void *data, size_t size, const VectorPtr &res);
|
||||
void HandleCheckReadyForPush(const void *data, size_t size, const VectorPtr &res);
|
||||
void HandleCheckReadyForPull(const void *data, size_t size, const VectorPtr &res);
|
||||
void HandleEmbeddingLookup(const void *data, size_t size, const VectorPtr &res);
|
||||
void HandleUpdateEmbeddings(const void *data, size_t size, const VectorPtr &res);
|
||||
void HandleFinalize(const void *data, size_t size, const VectorPtr &res);
|
||||
|
||||
private:
|
||||
ParameterServer *ps_;
|
||||
typedef void (ServerHandler::*RequestHandler)(const void *data, size_t size, const VectorPtr &res);
|
||||
mindspore::HashMap<int, RequestHandler> handlers_;
|
||||
mindspore::HashMap<int, std::string> commands_;
|
||||
mindspore::HashMap<Key, bool> init_weights_;
|
||||
mindspore::HashMap<Key, bool> init_weight_to_optim_;
|
||||
mindspore::HashMap<Key, bool> init_optim_info_;
|
||||
};
|
||||
|
||||
// For disaster recovery, you can customize the key-value structure that needs to be persisted, and you can customize
|
||||
// the business layer disaster recovery function.
|
||||
class RecoverHandler {
|
||||
public:
|
||||
explicit RecoverHandler(ParameterServer *ps) : ps_(ps) {}
|
||||
~RecoverHandler() = default;
|
||||
|
||||
// Initialize storage module and file storage is currently used.
|
||||
void Init();
|
||||
|
||||
// Do disaster recovery.
|
||||
void Recover();
|
||||
|
||||
core::FileConfiguration *config_storage() const { return storage_.get(); }
|
||||
|
||||
private:
|
||||
// Load embedding information from persistent storage to recover embedding table.
|
||||
void RecoverEmbedding();
|
||||
|
||||
ParameterServer *ps_;
|
||||
typedef void (RecoverHandler::*RecoverFunc)();
|
||||
mindspore::HashMap<std::string, RecoverFunc> handlers_;
|
||||
std::unique_ptr<core::FileConfiguration> storage_{nullptr};
|
||||
};
|
||||
|
||||
bool Init(const FuncGraphPtr &func_graph);
|
||||
void InitOptimInfoBuilders();
|
||||
void InitWeightKeyToOptims(const Key &key, const int64_t &optim_id);
|
||||
void InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths);
|
||||
void InitWeight(const Key &key, const WeightPtr &weight);
|
||||
void InitGrad(const Key &key, const GradPtr &grad);
|
||||
void InitEmbeddingTable(const Key &key, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &shapes,
|
||||
const ParamInitInfo ¶m_init_info);
|
||||
bool HasWeight(const Key &key);
|
||||
void Finalize();
|
||||
void UpdateWeights();
|
||||
void AccumGrad(const Keys &key, const Values &values, const Lengths &lengths);
|
||||
WeightPtr weight(const Key &key);
|
||||
void DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, KVMessage *res);
|
||||
void UpdateEmbeddings(const Key &key, const LookupIds &lookup_ids, const Values &vals);
|
||||
inline bool ReadyForUpdateWeights() const;
|
||||
inline bool ReadyForPush(const Key &key);
|
||||
inline bool ReadyForPull(const Key &key);
|
||||
inline void ResetGradAccumCount();
|
||||
const CNodePtr GetCNode(const std::string &name) const;
|
||||
inline std::mutex &mutex();
|
||||
void GetEmbeddingTableParamPtr();
|
||||
void SyncEmbeddingTables();
|
||||
// Cache embedding table parameter by map, key: parameter name, value: parameter node pointer
|
||||
void CacheEmbeddingTableParamPtr();
|
||||
|
||||
// Whether enable disaster recovery.
|
||||
bool EnableRecovery() const;
|
||||
|
||||
// Persist weight periodically, trigger by scheduler.
|
||||
void PersistParameters();
|
||||
|
||||
// Persist sparse network operators when receive init embedding table message.
|
||||
void PersistKernels(const Key &key, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &shapes,
|
||||
const ParamInitInfo ¶m_init_info) const;
|
||||
|
||||
// Persist parameters store in parameter server when receive init message.
|
||||
void PersistInitParameters(const Key &key, const WeightPtr ¶m);
|
||||
|
||||
// Restore sparse network operators and parameters.
|
||||
void RecoverEmbedding(const std::vector<Key> &keys, const std::vector<std::vector<ShapeVector>> &shapes_list,
|
||||
const std::vector<std::string> ¶m_names);
|
||||
|
||||
// Restore sparse network operators.
|
||||
void RecoverKernels(const std::vector<Key> &keys, const std::vector<std::vector<ShapeVector>> &shapes_list,
|
||||
const std::vector<std::string> ¶m_names);
|
||||
|
||||
// Restore parameters store in parameter server.
|
||||
void RecoverParameters(const std::vector<Key> &keys);
|
||||
|
||||
// Update the indices of modified part of the persistent parameter.
|
||||
void UpdateDirtyInfo(const Key &key, const LookupIds &lookup_ids, int64_t offset);
|
||||
|
||||
// Ser current persistent state to server node.
|
||||
void set_persistent_state(core::PersistentState persistent_state) const;
|
||||
|
||||
std::unique_ptr<RecoverHandler> recover_handler_;
|
||||
std::atomic_bool finish_recovery_{false};
|
||||
|
||||
size_t pserver_num_;
|
||||
size_t worker_num_;
|
||||
size_t grad_accum_count_;
|
||||
std::unique_ptr<ServerHandler> handler_;
|
||||
FuncGraphPtr func_graph_;
|
||||
std::shared_ptr<session::SessionBasic> sess_;
|
||||
bool running_;
|
||||
bool embedding_param_ptr_cached_{false};
|
||||
// Used to cache embedding table parameter, key: parameter name, value: parameter node pointer
|
||||
mindspore::HashMap<std::string, ParameterPtr> embedding_parameter_tables_;
|
||||
// Used to cache the modified part of the parameter.
|
||||
mindspore::HashMap<Key, distributed::storage::DirtyInfo> weights_dirty_info_;
|
||||
|
||||
mindspore::HashMap<Key, std::shared_ptr<PServerKernel>> optimizers_;
|
||||
mindspore::HashMap<Key, InputsShapePtr> optim_inputs_shape_;
|
||||
mindspore::HashMap<Key, InputsShapePtr> original_optim_inputs_shape_;
|
||||
mindspore::HashMap<Key, std::shared_ptr<OptimizerInfo>> optim_infos_;
|
||||
mindspore::HashMap<std::string, std::shared_ptr<OptimizerInfoBuilder>> optim_info_builders_;
|
||||
mindspore::HashMap<Key, std::string> weight_key_to_optims_;
|
||||
mindspore::HashMap<Key, std::string> weight_key_to_optim_op_;
|
||||
mindspore::HashMap<Key, WeightPtr> weights_;
|
||||
mindspore::HashMap<Key, bool> is_embedding_;
|
||||
mindspore::HashMap<Key, GradPtr> grads_;
|
||||
mindspore::HashMap<Key, size_t> grads_accum_counter_;
|
||||
mindspore::HashMap<Key, std::shared_ptr<PServerKernel>> embedding_lookup_ops_;
|
||||
mindspore::HashMap<Key, uint64_t> tokens_;
|
||||
|
||||
std::mutex mutex_;
|
||||
std::condition_variable apply_grads_cv_;
|
||||
|
||||
std::mutex access_weight_mutex_;
|
||||
std::unique_ptr<std::thread> thread_;
|
||||
std::unique_ptr<std::thread> persist_thread_;
|
||||
std::shared_ptr<core::PSServerNode> server_node_;
|
||||
std::map<Key, ParameterPtr> embedding_tables_;
|
||||
|
||||
friend class ServerHandler;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PS_PARAMETER_SERVER_H_
|
|
@ -1,108 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ps/ps_cache/embedding_hash_map.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
int EmbeddingHashMap::ParseData(const int id, int *const swap_out_index, int *const swap_out_ids,
|
||||
const size_t data_step, const size_t graph_running_step, size_t *const swap_out_size,
|
||||
bool *const need_wait_graph) {
|
||||
MS_EXCEPTION_IF_NULL(swap_out_index);
|
||||
MS_EXCEPTION_IF_NULL(swap_out_ids);
|
||||
MS_EXCEPTION_IF_NULL(swap_out_size);
|
||||
bool need_swap = false;
|
||||
auto hash_index = FindInsertionPos(data_step, graph_running_step, &need_swap, need_wait_graph);
|
||||
if (hash_index == INVALID_INDEX_VALUE) {
|
||||
return hash_index;
|
||||
}
|
||||
|
||||
if (!need_swap) {
|
||||
hash_count_++;
|
||||
(void)hash_id_to_index_.emplace(id, hash_index);
|
||||
hash_map_elements_[hash_index].set_id(id);
|
||||
hash_map_elements_[hash_index].set_step(data_step);
|
||||
return hash_index;
|
||||
}
|
||||
|
||||
swap_out_index[*swap_out_size] = hash_index;
|
||||
swap_out_ids[*swap_out_size] = hash_map_elements_[hash_index].id_;
|
||||
(*swap_out_size)++;
|
||||
(void)hash_id_to_index_.erase(hash_map_elements_[hash_index].id_);
|
||||
(void)hash_id_to_index_.emplace(id, hash_index);
|
||||
hash_map_elements_[hash_index].set_id(id);
|
||||
hash_map_elements_[hash_index].set_step(data_step);
|
||||
return hash_index;
|
||||
}
|
||||
|
||||
int EmbeddingHashMap::FindInsertionPos(const size_t, const size_t graph_running_step, bool *const need_swap,
|
||||
bool *const need_wait_graph) {
|
||||
MS_EXCEPTION_IF_NULL(need_swap);
|
||||
MS_EXCEPTION_IF_NULL(need_wait_graph);
|
||||
int hash_index = INVALID_INDEX_VALUE;
|
||||
while (!expired_element_full_) {
|
||||
if (hash_map_elements_[current_pos_].IsEmpty()) {
|
||||
hash_index = current_pos_;
|
||||
hash_count_++;
|
||||
} else if (hash_map_elements_[current_pos_].IsExpired(graph_running_step)) {
|
||||
hash_index = current_pos_;
|
||||
*need_swap = true;
|
||||
} else if (hash_map_elements_[current_pos_].IsStep(graph_running_step)) {
|
||||
graph_running_index_[graph_running_index_num_++] = current_pos_;
|
||||
}
|
||||
current_pos_ = (current_pos_ + 1) % hash_capacity_;
|
||||
if (hash_index != INVALID_INDEX_VALUE) {
|
||||
return hash_index;
|
||||
}
|
||||
if (current_pos_ == current_batch_start_pos_) {
|
||||
expired_element_full_ = true;
|
||||
MS_LOG(INFO) << "Running step:" << graph_running_step << "(num:" << graph_running_index_num_
|
||||
<< ") will be used, index swap will wait until the graph completed.";
|
||||
}
|
||||
}
|
||||
|
||||
if (graph_running_index_pos_ != graph_running_index_num_) {
|
||||
*need_swap = true;
|
||||
*need_wait_graph = true;
|
||||
return graph_running_index_[graph_running_index_pos_++];
|
||||
}
|
||||
return INVALID_INDEX_VALUE;
|
||||
}
|
||||
|
||||
void EmbeddingHashMap::DumpHashMap() {
|
||||
MS_LOG(INFO) << "Dump hash map info begin, hash_capacity: " << hash_capacity_ << " hash_count: " << hash_count_;
|
||||
MS_LOG(INFO) << "Dump hash_id_to_index: ";
|
||||
for (auto iter = hash_id_to_index_.begin(); iter != hash_id_to_index_.end(); ++iter) {
|
||||
MS_LOG(INFO) << " id: " << iter->first << " index: " << iter->second;
|
||||
}
|
||||
MS_LOG(INFO) << "Dump hash_map_unit: ";
|
||||
for (size_t i = 0; i < hash_map_elements_.size(); i++) {
|
||||
if (!hash_map_elements_[i].IsEmpty()) {
|
||||
MS_LOG(INFO) << " index: " << i << " id: " << hash_map_elements_[i].id_
|
||||
<< " step: " << hash_map_elements_[i].step_;
|
||||
}
|
||||
}
|
||||
MS_LOG(INFO) << "Dump hash map info end.";
|
||||
}
|
||||
|
||||
void EmbeddingHashMap::Reset() {
|
||||
current_batch_start_pos_ = current_pos_;
|
||||
graph_running_index_num_ = 0;
|
||||
graph_running_index_pos_ = 0;
|
||||
expired_element_full_ = false;
|
||||
}
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
|
@ -1,89 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PS_PS_CACHE_EMBEDDING_HASH_MAP_H_
|
||||
#define MINDSPORE_CCSRC_PS_PS_CACHE_EMBEDDING_HASH_MAP_H_
|
||||
|
||||
#include <cmath>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "utils/hash_map.h"
|
||||
#include "utils/convert_utils_base.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
static const size_t INVALID_STEP_VALUE = 0;
|
||||
static const int INVALID_INDEX_VALUE = -1;
|
||||
|
||||
struct HashMapElement {
|
||||
int id_{INVALID_INDEX_VALUE};
|
||||
size_t step_{INVALID_STEP_VALUE};
|
||||
bool IsEmpty() const { return step_ == INVALID_STEP_VALUE; }
|
||||
bool IsExpired(size_t graph_running_step) const { return graph_running_step > step_; }
|
||||
bool IsStep(size_t step) const { return step_ == step; }
|
||||
void set_id(int id) { id_ = id; }
|
||||
void set_step(size_t step) { step_ = step; }
|
||||
};
|
||||
|
||||
// Hash table is held in device, HashMap is used to manage hash table in host.
|
||||
class EmbeddingHashMap {
|
||||
public:
|
||||
EmbeddingHashMap(size_t hash_count, size_t hash_capacity)
|
||||
: hash_count_(hash_count),
|
||||
hash_capacity_(hash_capacity),
|
||||
current_pos_(0),
|
||||
current_batch_start_pos_(0),
|
||||
graph_running_index_num_(0),
|
||||
graph_running_index_pos_(0),
|
||||
expired_element_full_(false) {
|
||||
hash_map_elements_.resize(hash_capacity);
|
||||
// In multi-device mode, embedding table are distributed on different devices by ID interval,
|
||||
// and IDs outside the range of local device will use the front and back positions of the table,
|
||||
// the positions are reserved for this.
|
||||
hash_map_elements_.front().set_step(SIZE_MAX);
|
||||
hash_map_elements_.back().set_step(SIZE_MAX);
|
||||
graph_running_index_ = std::make_unique<int[]>(hash_capacity);
|
||||
}
|
||||
virtual ~EmbeddingHashMap() = default;
|
||||
int ParseData(const int id, int *const swap_out_index, int *const swap_out_ids, const size_t data_step,
|
||||
const size_t graph_running_step, size_t *const swap_out_size, bool *const need_wait_graph);
|
||||
size_t hash_step(const int hash_index) const { return hash_map_elements_[IntToSize(hash_index)].step_; }
|
||||
void set_hash_step(const int hash_index, const size_t step) {
|
||||
hash_map_elements_[IntToSize(hash_index)].set_step(step);
|
||||
}
|
||||
const mindspore::HashMap<int, int> &hash_id_to_index() const { return hash_id_to_index_; }
|
||||
size_t hash_capacity() const { return hash_capacity_; }
|
||||
void DumpHashMap();
|
||||
void Reset();
|
||||
|
||||
private:
|
||||
int FindInsertionPos(const size_t data_step, const size_t graph_running_step, bool *const need_swap,
|
||||
bool *const need_wait_graph);
|
||||
size_t hash_count_;
|
||||
size_t hash_capacity_;
|
||||
std::vector<HashMapElement> hash_map_elements_;
|
||||
mindspore::HashMap<int, int> hash_id_to_index_;
|
||||
size_t current_pos_;
|
||||
size_t current_batch_start_pos_;
|
||||
size_t graph_running_index_num_;
|
||||
size_t graph_running_index_pos_;
|
||||
std::unique_ptr<int[]> graph_running_index_;
|
||||
bool expired_element_full_;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PS_PS_CACHE_EMBEDDING_HASH_MAP_H_
|
|
@ -1,47 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_BASIC_H
|
||||
#define MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_BASIC_H
|
||||
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
class PsCacheBasic {
|
||||
public:
|
||||
PsCacheBasic() = default;
|
||||
virtual ~PsCacheBasic() = default;
|
||||
virtual bool InitDevice(uint32_t device_id, const void *context) = 0;
|
||||
virtual void *MallocMemory(size_t size) = 0;
|
||||
virtual bool MallocConstantMemory(size_t) { return true; }
|
||||
virtual void FreeMemory(void *buf) = 0;
|
||||
virtual bool RecordEvent() = 0;
|
||||
virtual bool SynchronizeEvent() = 0;
|
||||
virtual bool SynchronizeStream() = 0;
|
||||
virtual bool CopyHostMemToDevice(void *dst, const void *src, size_t size) = 0;
|
||||
virtual bool CopyDeviceMemToHost(void *dst, const void *src, size_t size) = 0;
|
||||
virtual bool HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr,
|
||||
size_t cache_vocab_size, size_t embedding_size, size_t swap_out_size) = 0;
|
||||
virtual bool HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr,
|
||||
size_t cache_vocab_size, size_t embedding_size, size_t swap_in_size) = 0;
|
||||
|
||||
protected:
|
||||
void *stream_;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_BASIC_H
|
|
@ -1,42 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ps/ps_cache/ps_cache_factory.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
PsCacheFactory &PsCacheFactory::Get() {
|
||||
static PsCacheFactory instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
void PsCacheFactory::Register(const std::string &device_name, PsCacheCreator &&ps_cache_creator) {
|
||||
if (ps_cache_creators_.end() == ps_cache_creators_.find(device_name)) {
|
||||
(void)ps_cache_creators_.emplace(device_name, ps_cache_creator);
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<PsCacheBasic> PsCacheFactory::ps_cache(const std::string &device_name) {
|
||||
auto iter = ps_cache_creators_.find(device_name);
|
||||
if (ps_cache_creators_.end() != iter) {
|
||||
MS_EXCEPTION_IF_NULL(iter->second);
|
||||
return (iter->second)();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
|
@ -1,57 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_FACTORY_H_
|
||||
#define MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_FACTORY_H_
|
||||
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "ps/ps_cache/ps_cache_basic.h"
|
||||
#include "utils/ms_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
using PsCacheCreator = std::function<std::shared_ptr<PsCacheBasic>()>;
|
||||
class PsCacheFactory {
|
||||
public:
|
||||
static PsCacheFactory &Get();
|
||||
void Register(const std::string &device_name, PsCacheCreator &&ps_cache_creator);
|
||||
std::shared_ptr<PsCacheBasic> ps_cache(const std::string &device_name);
|
||||
|
||||
private:
|
||||
PsCacheFactory() = default;
|
||||
~PsCacheFactory() = default;
|
||||
DISABLE_COPY_AND_ASSIGN(PsCacheFactory)
|
||||
std::map<std::string, PsCacheCreator> ps_cache_creators_;
|
||||
};
|
||||
|
||||
class PsCacheRegistrar {
|
||||
public:
|
||||
PsCacheRegistrar(const std::string &device_name, PsCacheCreator &&ps_cache_creator) {
|
||||
PsCacheFactory::Get().Register(device_name, std::move(ps_cache_creator));
|
||||
}
|
||||
~PsCacheRegistrar() = default;
|
||||
};
|
||||
|
||||
#define MS_REG_PS_CACHE(DEVICE_NAME, PS_CACHE_CLASS) \
|
||||
static const PsCacheRegistrar g_ps_cache_registrar__##DEVICE_NAME##_##_reg( \
|
||||
DEVICE_NAME, []() { return std::make_shared<PS_CACHE_CLASS>(); });
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_FACTORY_H_
|
File diff suppressed because it is too large
Load Diff
|
@ -1,218 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_MANAGER_H_
|
||||
#define MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_MANAGER_H_
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <condition_variable>
|
||||
#include "utils/ms_context.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "utils/shape_utils.h"
|
||||
#include "ir/tensor.h"
|
||||
#include "ps/constants.h"
|
||||
#include "ps/worker.h"
|
||||
#include "ps/ps_context.h"
|
||||
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
|
||||
#include "ps/ps_cache/embedding_hash_map.h"
|
||||
#include "ps/ps_cache/ps_cache_factory.h"
|
||||
#include "include/backend/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
constexpr size_t kHostCacheScaleFactor = 10;
|
||||
constexpr size_t kMaxThreadNum = 16;
|
||||
constexpr size_t kMaxIdsPerThread = 10000;
|
||||
using mindspore::kernel::Address;
|
||||
|
||||
struct HashTableInfo {
|
||||
size_t cache_vocab_size{0};
|
||||
size_t host_cache_vocab_size{0};
|
||||
size_t embedding_size{0};
|
||||
size_t vocab_size{0};
|
||||
Address device_address{nullptr, 0};
|
||||
std::shared_ptr<float> host_address{nullptr};
|
||||
ParamInitInfo param_init_info_;
|
||||
int32_t param_key_{-1};
|
||||
};
|
||||
|
||||
struct EmbeddingDeviceCache {
|
||||
EmbeddingDeviceCache(size_t batch_elements, size_t cache_vocab_size)
|
||||
: hash_swap_index_addr_(nullptr), hash_swap_value_addr_(nullptr) {
|
||||
device_to_host_index = std::make_unique<int[]>(batch_elements);
|
||||
device_to_host_ids = std::make_unique<int[]>(batch_elements);
|
||||
host_to_device_index = std::make_unique<int[]>(batch_elements);
|
||||
host_to_device_ids = std::make_unique<int[]>(batch_elements);
|
||||
device_hash_map_ = std::make_shared<EmbeddingHashMap>(0, cache_vocab_size);
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
auto devcie_target = context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
cache_ = PsCacheFactory::Get().ps_cache(devcie_target);
|
||||
}
|
||||
std::unique_ptr<int[]> device_to_host_index;
|
||||
std::unique_ptr<int[]> device_to_host_ids;
|
||||
std::unique_ptr<int[]> host_to_device_index;
|
||||
std::unique_ptr<int[]> host_to_device_ids;
|
||||
int *hash_swap_index_addr_;
|
||||
float *hash_swap_value_addr_;
|
||||
std::shared_ptr<EmbeddingHashMap> device_hash_map_;
|
||||
std::shared_ptr<PsCacheBasic> cache_;
|
||||
};
|
||||
|
||||
struct EmbeddingHostCache {
|
||||
EmbeddingHostCache(size_t batch_elements, size_t host_cache_vocab_size) {
|
||||
host_to_server_index = std::make_unique<int[]>(batch_elements);
|
||||
host_to_server_ids = std::make_unique<int[]>(batch_elements);
|
||||
server_to_host_index = std::make_unique<int[]>(batch_elements);
|
||||
server_to_host_ids = std::make_unique<int[]>(batch_elements);
|
||||
host_to_device_index = std::make_unique<int[]>(batch_elements);
|
||||
device_to_host_index = std::make_unique<int[]>(batch_elements);
|
||||
host_hash_map_ = std::make_shared<EmbeddingHashMap>(0, host_cache_vocab_size);
|
||||
}
|
||||
std::unique_ptr<int[]> host_to_server_index;
|
||||
std::unique_ptr<int[]> host_to_server_ids;
|
||||
std::unique_ptr<int[]> server_to_host_index;
|
||||
std::unique_ptr<int[]> server_to_host_ids;
|
||||
std::unique_ptr<int[]> host_to_device_index;
|
||||
std::unique_ptr<int[]> device_to_host_index;
|
||||
std::shared_ptr<EmbeddingHashMap> host_hash_map_;
|
||||
};
|
||||
|
||||
struct PsCacheStatisticsInfo {
|
||||
size_t batch_id_count_{0};
|
||||
size_t batch_id_unique_count_{0};
|
||||
size_t device_to_host_size_{0};
|
||||
size_t host_to_device_size_{0};
|
||||
size_t host_to_server_size_{0};
|
||||
size_t server_to_host_size_{0};
|
||||
size_t hash_hit_count_{0};
|
||||
size_t mem_cache_swap_out_size_{0};
|
||||
size_t mem_cache_swap_in_size_{0};
|
||||
size_t mem_cache_hit_count_{0};
|
||||
};
|
||||
|
||||
class BACKEND_EXPORT PsCacheManager {
|
||||
public:
|
||||
static PsCacheManager &GetInstance();
|
||||
void Initialize();
|
||||
void InsertHashTableSize(const std::string ¶m_name, size_t cache_vocab_size, size_t embedding_size,
|
||||
size_t vocab_size);
|
||||
void InsertWeightInitInfo(const std::string ¶m_name, size_t global_seed, size_t op_seed);
|
||||
void InsertAccumuInitInfo(const std::string ¶m_name, float init_val);
|
||||
void ReInsertHashTableSize(const std::string &new_param_name, const std::string &cur_param_name,
|
||||
size_t cache_vocab_size, size_t embedding_size);
|
||||
void CloneHashTable(const std::string &dest_param_name, const std::string &src_param_name);
|
||||
const Address &QueryHashTableAddr(const std::string ¶m_name) const;
|
||||
const size_t &QueryHashTableSize(const std::string ¶m_name) const;
|
||||
bool IsHashTable(const std::string ¶m_name) { return hash_tables_.count(param_name) != 0; }
|
||||
void set_batch_elements(size_t batch_elements) { batch_elements_ = batch_elements; }
|
||||
void set_rank_id(uint32_t rank_id) { rank_id_ = rank_id; }
|
||||
bool initialized_ps_cache() const { return initialized_ps_cache_; }
|
||||
size_t vocab_cache_size() const { return vocab_cache_size_; }
|
||||
int cache_indices_lower_bound() const;
|
||||
void DoProcessData(uint32_t device_id, const void *context);
|
||||
void IncreaseGraphStep(const std::string &channel_name);
|
||||
void SyncEmbeddingTable();
|
||||
void Finalize();
|
||||
void DumpHashTables(bool dump_device_tables = false) const;
|
||||
|
||||
private:
|
||||
PsCacheManager() = default;
|
||||
~PsCacheManager() = default;
|
||||
PsCacheManager(const PsCacheManager &) = delete;
|
||||
PsCacheManager &operator=(const PsCacheManager &) = delete;
|
||||
bool IncreaseStep();
|
||||
void set_current_graph_step() { graph_running_step_ = graph_step_; }
|
||||
std::string channel_name();
|
||||
void set_channel_name(const std::string channel_name);
|
||||
bool InitParameterServer();
|
||||
void InitDataChannel();
|
||||
void AllocMemForHashTable();
|
||||
void SetLocalIdRank();
|
||||
void ProcessDataTask(uint32_t device_id, const void *context);
|
||||
bool ProcessData();
|
||||
bool ParseData(const int *batch_ids, const size_t batch_ids_len, int *hash_index);
|
||||
bool WaitGraphRun();
|
||||
bool ParseDeviceData(size_t id, bool *need_swap_device_to_host, bool *need_swap_host_to_device, int *hash_index);
|
||||
bool ParseHostDataHostToDevice(size_t id);
|
||||
bool ParseHostDataDeviceToHost();
|
||||
bool HashSwapDeviceOut(int *swap_out_index, std::vector<float> *swap_out_data, const HashTableInfo &hash_info);
|
||||
bool HashSwapDeviceIn(const int *swap_in_ids, const int *swap_in_index, const HashTableInfo &hash_info, size_t key);
|
||||
bool HashSwapHostToDevice(const HashTableInfo &hash_info);
|
||||
bool HashSwapDeviceToHost(const HashTableInfo &hash_info);
|
||||
bool HashSwapHostToServer(size_t key, const HashTableInfo &hash_info);
|
||||
bool HashSwapServerToHost(size_t key, const HashTableInfo &hash_info);
|
||||
bool InsertHostHashTable(size_t embedding_size, size_t insert_indices_size, const int *insert_indices,
|
||||
const float *insert_data, float *hash_table_addr);
|
||||
bool LookUpHostHashTable(size_t embedding_size, size_t indices_lens, const float *hash_table_addr,
|
||||
const int *indices_addr, float *output_addr);
|
||||
bool UpdataEmbeddingTable(const std::vector<float> &swap_out_data, int *const swap_out_ids, size_t key);
|
||||
void LookUpTableTask(size_t indices_lens, size_t outer_dim_size, size_t first_dim_size, const float *input_addr,
|
||||
const int *indices_addr, float *output_addr);
|
||||
bool CheckFinishInsertInitInfo() const;
|
||||
void AddEmbeddingTable() const;
|
||||
void DumpStatisticsInfo(size_t each_print_step = 1000);
|
||||
bool SyncHostEmbeddingTable();
|
||||
bool SyncDeviceEmbeddingTable();
|
||||
bool CheckCacheHitOrOutRangeTask(const int *batch_ids, const size_t batch_ids_len, int *hash_index, bool *in_device,
|
||||
bool *out_range, size_t *hash_hit_count);
|
||||
bool CheckCacheHitOrOutRange(const int *batch_ids, const size_t batch_ids_len, int *hash_index, bool *in_device,
|
||||
bool *out_range);
|
||||
bool ResetEmbeddingHashMap();
|
||||
|
||||
bool initialized_ps_cache_{false};
|
||||
std::string channel_name_;
|
||||
std::mutex channel_mutex_;
|
||||
std::atomic_ulong graph_step_{0};
|
||||
size_t graph_running_step_{0};
|
||||
size_t data_step_{0};
|
||||
std::mutex data_mutex_;
|
||||
std::condition_variable data_prase_;
|
||||
std::condition_variable insert_init_info_;
|
||||
std::thread process_data_thread_;
|
||||
|
||||
std::map<std::string, HashTableInfo> hash_tables_;
|
||||
std::shared_ptr<EmbeddingDeviceCache> embedding_device_cache_;
|
||||
std::shared_ptr<EmbeddingHostCache> embedding_host_cache_;
|
||||
|
||||
size_t vocab_size_{0};
|
||||
size_t vocab_cache_size_{0};
|
||||
size_t host_vocab_cache_size_{0};
|
||||
size_t batch_elements_{0};
|
||||
PsCacheStatisticsInfo statistics_info_;
|
||||
std::pair<int, int> emb_table_slice_bounds_;
|
||||
std::pair<int, int> cache_indices_bounds_;
|
||||
int vocab_cache_size_diff_{0};
|
||||
uint32_t rank_id_{0};
|
||||
std::atomic_bool finish_insert_init_info_{false};
|
||||
std::atomic_bool finish_init_parameter_server_{false};
|
||||
std::atomic_bool running_{false};
|
||||
std::atomic_bool finalized_{false};
|
||||
bool finish_embedding_table_sync_{false};
|
||||
bool device_need_wait_graph_{false};
|
||||
bool host_need_wait_graph_{false};
|
||||
};
|
||||
|
||||
static PsCacheManager &ps_cache_instance = PsCacheManager::GetInstance();
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_MANAGER_H_
|
|
@ -21,7 +21,6 @@
|
|||
#include "utils/ms_utils.h"
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
||||
#include "distributed/cluster/cluster_context.h"
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
|
||||
#include "distributed/embedding_cache/embedding_cache_utils.h"
|
||||
#else
|
||||
|
@ -98,7 +97,6 @@ void PSContext::Reset() {
|
|||
is_sched_ = false;
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
||||
if (ps::PsDataPrefetch::GetInstance().cache_enable()) {
|
||||
ps_cache_instance.Finalize();
|
||||
set_cache_enable(false);
|
||||
}
|
||||
#endif
|
||||
|
@ -166,7 +164,6 @@ void PSContext::InsertHashTableSize(const std::string ¶m_name, size_t cache_
|
|||
embedding_cache_table_manager.InsertHashTableSize(param_name, cache_vocab_size, embedding_size, vocab_size,
|
||||
param_key);
|
||||
}
|
||||
ps_cache_instance.InsertHashTableSize(param_name, cache_vocab_size, embedding_size, vocab_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -177,21 +174,12 @@ void PSContext::ReInsertHashTableSize(const std::string &new_param_name, const s
|
|||
embedding_cache_table_manager.ReInsertHashTableSize(new_param_name, cur_param_name, cache_vocab_size,
|
||||
embedding_size);
|
||||
}
|
||||
ps_cache_instance.ReInsertHashTableSize(new_param_name, cur_param_name, cache_vocab_size, embedding_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
void PSContext::InsertWeightInitInfo(const std::string ¶m_name, size_t global_seed, size_t op_seed) const {
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
||||
ps_cache_instance.InsertWeightInitInfo(param_name, global_seed, op_seed);
|
||||
#endif
|
||||
}
|
||||
void PSContext::InsertWeightInitInfo(const std::string &, size_t, size_t) const { return; }
|
||||
|
||||
void PSContext::InsertAccumuInitInfo(const std::string ¶m_name, float init_val) const {
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
||||
ps_cache_instance.InsertAccumuInitInfo(param_name, init_val);
|
||||
#endif
|
||||
}
|
||||
void PSContext::InsertAccumuInitInfo(const std::string &, float) const { return; }
|
||||
|
||||
void PSContext::CloneHashTable(const std::string &dest_param_name, int32_t dest_param_key,
|
||||
const std::string &src_param_name, int32_t src_param_key) const {
|
||||
|
@ -199,7 +187,6 @@ void PSContext::CloneHashTable(const std::string &dest_param_name, int32_t dest_
|
|||
if (enable_distributed_mindrt()) {
|
||||
embedding_cache_table_manager.CloneHashTable(dest_param_name, dest_param_key, src_param_name, src_param_key);
|
||||
}
|
||||
ps_cache_instance.CloneHashTable(dest_param_name, src_param_name);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -216,11 +203,7 @@ bool PSContext::cache_enable() const {
|
|||
return false;
|
||||
}
|
||||
|
||||
void PSContext::set_rank_id(uint32_t rank_id) const {
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
||||
ps_cache_instance.set_rank_id(rank_id);
|
||||
#endif
|
||||
}
|
||||
void PSContext::set_rank_id(uint32_t) const { return; }
|
||||
|
||||
void PSContext::set_server_mode(const std::string &server_mode) {
|
||||
if (server_mode != kServerModePS && server_mode != kServerModeFL && server_mode != kServerModeHybrid) {
|
||||
|
|
|
@ -1,48 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ps/random_normal/random_normal.h"
|
||||
#include <random>
|
||||
#include "include/common/random.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore::ps {
|
||||
bool InitRandomNormal(float mean, float stddev, std::vector<size_t> out_shape, size_t global_seed, size_t op_seed,
|
||||
float *output_data) {
|
||||
// Check output data pointer.
|
||||
if (output_data == nullptr) {
|
||||
MS_LOG(ERROR) << "output data is null.";
|
||||
return false;
|
||||
}
|
||||
// Check shape.
|
||||
if (out_shape.size() == 0) {
|
||||
MS_LOG(ERROR) << "output data shape is empty.";
|
||||
return false;
|
||||
}
|
||||
// Calculate data size from shape.
|
||||
size_t data_size = 1;
|
||||
for (size_t i = 0; i < out_shape.size(); ++i) {
|
||||
data_size *= out_shape[i];
|
||||
}
|
||||
// Generate randoms parallel.
|
||||
constexpr int seed_shift = 32;
|
||||
const uint64_t seed = (global_seed << seed_shift) + op_seed;
|
||||
using Generator = random::Philox;
|
||||
using Distribution = random::NormalDistribution<float>;
|
||||
random::GenerateRandomsParallel<float, Generator, Distribution>(seed, output_data, data_size, mean, stddev);
|
||||
return true;
|
||||
}
|
||||
} // namespace mindspore::ps
|
|
@ -1,27 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_PS_RANDOM_NORMAL_RANDOM_NORMAL_H_
|
||||
#define MINDSPORE_CCSRC_PS_RANDOM_NORMAL_RANDOM_NORMAL_H_
|
||||
#include <vector>
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
bool InitRandomNormal(float mean, float stddev, std::vector<size_t> out_shape, size_t global_seed, size_t op_seed,
|
||||
float *output_data);
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_PS_RANDOM_NORMAL_RANDOM_NORMAL_H_
|
|
@ -1,971 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ps/worker.h"
|
||||
#include "pipeline/jit/pipeline.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
namespace {
|
||||
constexpr int kRetryDuration = 2000;
|
||||
} // namespace
|
||||
|
||||
Worker &Worker::GetInstance() {
|
||||
static Worker instance{};
|
||||
return instance;
|
||||
}
|
||||
|
||||
void Worker::Run() {
|
||||
std::lock_guard<std::mutex> lock(running_mutex_);
|
||||
|
||||
server_num_ = PSContext::instance()->initial_server_num();
|
||||
if (running_) {
|
||||
MS_LOG(INFO) << "'Worker is already running.";
|
||||
return;
|
||||
}
|
||||
if (!PSContext::instance()->is_worker()) {
|
||||
MS_LOG(EXCEPTION) << "The role is not worker.";
|
||||
}
|
||||
|
||||
Initialize();
|
||||
|
||||
worker_node_.RegisterEventCallback(core::ClusterEvent::SCHEDULER_TIMEOUT, [this]() {
|
||||
MS_LOG(ERROR) << "Trigger timeout event: SCHEDULER_TIMEOUT begin to exit the system!";
|
||||
this->Finalize();
|
||||
exit(0);
|
||||
});
|
||||
worker_node_.RegisterEventCallback(core::ClusterEvent::NODE_TIMEOUT, [this]() {
|
||||
MS_LOG(ERROR) << "Trigger timeout event: NODE_TIMEOUT begin to exit the system!";
|
||||
this->Finalize();
|
||||
exit(0);
|
||||
});
|
||||
|
||||
MS_LOG(INFO) << "Worker starts connecting to scheduler and server...";
|
||||
worker_node_.Start();
|
||||
MS_LOG(INFO) << "Worker connected successfully.";
|
||||
|
||||
running_ = true;
|
||||
}
|
||||
|
||||
void Worker::Push(const std::vector<size_t> &keys, std::vector<uintptr_t> addrs, const ShapeVector &sizes) {
|
||||
if (keys.size() == 0) {
|
||||
MS_LOG(EXCEPTION) << "key size should be greater than zero";
|
||||
}
|
||||
if (key_to_optimId_.count(keys[0]) == 0) {
|
||||
MS_LOG(EXCEPTION) << "no optim id found for key" << keys[0];
|
||||
}
|
||||
Key key = keys[0];
|
||||
int64_t optim_id = key_to_optimId_[key];
|
||||
MS_LOG(INFO) << "The key is:" << key << " the optim_id:" << optim_id;
|
||||
bool is_sparse = false;
|
||||
if (optim_id == 1 || optim_id == kSparseLazyAdamIndex || optim_id == kSparseFtrlIndex) {
|
||||
is_sparse = true;
|
||||
}
|
||||
int64_t grad_index = -1;
|
||||
int64_t indice_index = -1;
|
||||
|
||||
// Sparse adam gradient
|
||||
if (optim_id == 1 || optim_id == kSparseLazyAdamIndex) {
|
||||
grad_index = kSparseGradIndex;
|
||||
indice_index = kSparseIndiceIndex;
|
||||
|
||||
// Sparse ftrl gradient
|
||||
} else if (optim_id == kSparseFtrlIndex) {
|
||||
grad_index = 0;
|
||||
indice_index = 1;
|
||||
}
|
||||
|
||||
size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0, std::plus<int64_t>());
|
||||
std::vector<float> total_buffer(total_size, 0);
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < sizes.size(); i++) {
|
||||
void *dst_data = total_buffer.data() + offset / sizeof(float);
|
||||
void *src_data = reinterpret_cast<void *>(addrs[i]);
|
||||
MS_EXCEPTION_IF_NULL(dst_data);
|
||||
MS_EXCEPTION_IF_NULL(src_data);
|
||||
size_t size = sizes[i] * sizeof(float);
|
||||
size_t dest_size = size;
|
||||
size_t src_size = size;
|
||||
errno_t ret = memcpy_s(dst_data, dest_size, src_data, src_size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
return;
|
||||
}
|
||||
offset += size;
|
||||
}
|
||||
MS_LOG(INFO) << "The total size is:" << total_size;
|
||||
|
||||
while (running_ && (!IsReadyForPush(keys[0]))) {
|
||||
continue;
|
||||
}
|
||||
std::vector<int> sizes_int;
|
||||
(void)std::transform(sizes.begin(), sizes.end(), std::back_inserter(sizes_int),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (!is_sparse) {
|
||||
PushData(std::vector<Key>(keys), total_buffer, std::vector<int>(sizes_int), kPushCmd);
|
||||
} else {
|
||||
std::vector<int64_t> &var_shape = key_to_optim_shapes_[key][0];
|
||||
int64_t first_dim_size = var_shape[0];
|
||||
int64_t outer_dim_size = std::accumulate(var_shape.begin() + 1, var_shape.end(), 1, std::multiplies<int64_t>());
|
||||
MS_LOG(DEBUG) << "The keys:" << keys << " the total_buffer:" << total_buffer << " the sizes_int:" << sizes_int
|
||||
<< " the grad_index:" << grad_index << " the indice_index:" << indice_index
|
||||
<< " the first_dim_size:" << first_dim_size << " the outer_dim_size" << outer_dim_size;
|
||||
PushSparseData(std::vector<Key>(keys), total_buffer, std::vector<int>(sizes_int), LongToSize(grad_index),
|
||||
LongToSize(indice_index), LongToSize(first_dim_size), LongToSize(outer_dim_size));
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::Pull(const size_t key, void *dev_addr, const size_t size) {
|
||||
MS_EXCEPTION_IF_NULL(dev_addr);
|
||||
std::vector<float> variables(size / sizeof(float), 0);
|
||||
while (running_ && (!IsReadyForPull(key))) {
|
||||
continue;
|
||||
}
|
||||
PullData({key}, &variables, nullptr, kPullCmd);
|
||||
MS_LOG(DEBUG) << "The variables:" << variables << " the size is:" << size;
|
||||
size_t dst_size = size;
|
||||
size_t src_size = size;
|
||||
errno_t ret = memcpy_s(dev_addr, dst_size, variables.data(), src_size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
size_t Worker::SetParamKey(const std::string ¶m_name) {
|
||||
size_t key = UINT64_MAX;
|
||||
if (param_to_key_.count(param_name)) {
|
||||
key = param_to_key_[param_name];
|
||||
MS_LOG(INFO) << param_name << " key is already set: key value is " << key;
|
||||
} else {
|
||||
key = key_cnt_++;
|
||||
param_to_key_[param_name] = key;
|
||||
MS_LOG(INFO) << "Set key " << key << " for parameter " << param_name;
|
||||
}
|
||||
return key;
|
||||
}
|
||||
|
||||
size_t Worker::GetParamKey(const std::string ¶m_name) {
|
||||
size_t key = kInvalidKey;
|
||||
if (param_to_key_.find(param_name) != param_to_key_.end()) {
|
||||
key = param_to_key_[param_name];
|
||||
MS_LOG(DEBUG) << "Get key of parameter " << param_name << " key is " << key;
|
||||
}
|
||||
return key;
|
||||
}
|
||||
|
||||
void Worker::SetParamInitInServer(const std::string ¶m_name, bool init_in_server) {
|
||||
MS_LOG(DEBUG) << "Set parameter " << param_name << " init_in_server:" << init_in_server;
|
||||
param_to_init_in_server_[param_name] = init_in_server;
|
||||
}
|
||||
|
||||
bool Worker::GetParamInitInServer(const std::string ¶m_name) {
|
||||
if (param_to_init_in_server_.count(param_name) == 0) {
|
||||
return false;
|
||||
}
|
||||
return param_to_init_in_server_[param_name];
|
||||
}
|
||||
|
||||
void Worker::SetKeyOptimId(size_t key, const std::string &optimizer_name) {
|
||||
MS_LOG(INFO) << "SetKeyOptimId key is:" << key << " optimizer_name:" << optimizer_name;
|
||||
key_to_optimId_[key] = Util::optimizer_id(optimizer_name);
|
||||
}
|
||||
|
||||
void Worker::SetOptimInputShapes(size_t key, const ShapeVector &shape) {
|
||||
if (key_to_optim_shapes_.find(key) == key_to_optim_shapes_.end()) {
|
||||
key_to_optim_shapes_[key] = {shape};
|
||||
} else {
|
||||
key_to_optim_shapes_[key].push_back(shape);
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::AddEmbeddingTable(const Key &key, const size_t &row_count) {
|
||||
bool has_init = IsKeyInit(key);
|
||||
if (has_init) {
|
||||
return;
|
||||
}
|
||||
uint64_t begin = 0;
|
||||
uint64_t end = 0;
|
||||
for (int64_t i = 0; i < server_num_; i++) {
|
||||
size_t local_row_cnt = LongToSize(Util::LocalShard(row_count, i, server_num_));
|
||||
MS_LOG(DEBUG) << "The row_count:" << row_count << " the local_row_cnt:" << local_row_cnt;
|
||||
if (i == 0) {
|
||||
end = local_row_cnt - 1;
|
||||
} else {
|
||||
begin = end + 1;
|
||||
end += local_row_cnt;
|
||||
}
|
||||
EmbeddingTableShardMetadata range(begin, end);
|
||||
if (embedding_table_ranges_.count(key) == 0) {
|
||||
embedding_table_ranges_[key] = std::make_shared<std::vector<EmbeddingTableShardMetadata>>();
|
||||
MS_EXCEPTION_IF_NULL(embedding_table_ranges_[key]);
|
||||
}
|
||||
embedding_table_ranges_[key]->push_back(range);
|
||||
}
|
||||
embedding_row_cnt_[key] = row_count;
|
||||
}
|
||||
|
||||
bool Worker::InitPSEmbeddingTable(const size_t &key, const std::vector<size_t> &input_shape,
|
||||
const std::vector<size_t> &indices_shape, const std::vector<size_t> &output_shape,
|
||||
const ParamInitInfoMessage &info, uint32_t timeout) {
|
||||
bool has_init = IsKeyInit(key);
|
||||
if (has_init) {
|
||||
MS_LOG(DEBUG) << "The key embedding table of key " << key << " is initialized.";
|
||||
return true;
|
||||
}
|
||||
|
||||
EmbeddingTableMeta embedding_table_meta;
|
||||
embedding_table_meta.set_key(key);
|
||||
*embedding_table_meta.mutable_input_shape() = {input_shape.begin(), input_shape.end()};
|
||||
*embedding_table_meta.mutable_indices_shape() = {indices_shape.begin(), indices_shape.end()};
|
||||
*embedding_table_meta.mutable_output_shape() = {output_shape.begin(), output_shape.end()};
|
||||
*embedding_table_meta.mutable_info() = info;
|
||||
|
||||
const std::string &kv_data = embedding_table_meta.SerializeAsString();
|
||||
while (!worker_node_.Broadcast(core::NodeRole::SERVER, kv_data, kInitEmbeddingsCmd, timeout)) {
|
||||
MS_LOG(INFO) << "Worker Broadcast failed!, retrying.";
|
||||
if (!running_) {
|
||||
MS_LOG(ERROR) << "Worker Broadcast failed!";
|
||||
return false;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(kRetryDuration));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Worker::InitPSParamAndOptim(const AnfNodePtr &input_node, const tensor::TensorPtr &tensor) {
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
MS_EXCEPTION_IF_NULL(input_node);
|
||||
auto pk_node = input_node->cast<ParameterPtr>();
|
||||
MS_EXCEPTION_IF_NULL(pk_node);
|
||||
const std::string ¶m_name = pk_node->fullname_with_scope();
|
||||
void *param_data = tensor->data_c();
|
||||
size_t param_size = LongToSize(tensor->data().nbytes());
|
||||
|
||||
size_t param_key = GetParamKey(param_name);
|
||||
if (param_key == kInvalidKey) {
|
||||
MS_LOG(DEBUG) << "Parameter " << param_name << " has no key assigned.";
|
||||
return;
|
||||
}
|
||||
bool init_in_server = false;
|
||||
auto param_info_ptr = pk_node->param_info();
|
||||
if (param_info_ptr != nullptr && param_info_ptr->init_in_server()) {
|
||||
init_in_server = true;
|
||||
}
|
||||
SetParamInitInServer(param_name, init_in_server);
|
||||
bool init = IsKeyInit(param_key);
|
||||
if (!init) {
|
||||
MS_LOG(DEBUG) << "Init parameter key " << param_key << " and optimizer in parameter server side for " << param_name
|
||||
<< ", whether init in server: " << init_in_server;
|
||||
AddKeyToServerId(param_key);
|
||||
if (!PsDataPrefetch::GetInstance().cache_enable()) {
|
||||
if (!init_in_server) {
|
||||
if (param_size > INT_MAX) {
|
||||
MS_LOG(EXCEPTION) << "PS mode max weight size is " << INT_MAX << ", " << param_name << " size is "
|
||||
<< param_size;
|
||||
}
|
||||
InitPSParamData({param_key}, param_data, param_size);
|
||||
}
|
||||
InitPSOptimId(param_key);
|
||||
InitPSOptimInputShapes(param_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool Worker::DoPSEmbeddingLookup(const Key &key, const std::vector<int> &lookup_ids, std::vector<float> *lookup_result,
|
||||
int64_t cmd) {
|
||||
MS_EXCEPTION_IF_NULL(lookup_result);
|
||||
EmbeddingTableLookup embedding_table_lookup;
|
||||
embedding_table_lookup.set_key(key);
|
||||
*embedding_table_lookup.mutable_keys() = {lookup_ids.begin(), lookup_ids.end()};
|
||||
|
||||
PartitionEmbeddingMessages messages;
|
||||
lookup_partitioner_(embedding_table_lookup, &messages, {});
|
||||
std::vector<uint32_t> rank_ids;
|
||||
std::vector<std::string> data_strs;
|
||||
for (size_t i = 0; i < messages.size(); i++) {
|
||||
if (messages.at(i).first) {
|
||||
rank_ids.push_back(i);
|
||||
data_strs.emplace_back(messages.at(i).second.SerializeAsString());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<VectorPtr> resp;
|
||||
while (!worker_node_.Send(core::NodeRole::SERVER, rank_ids, data_strs, LongToInt(cmd), &resp)) {
|
||||
MS_LOG(INFO) << "Worker send failed!, retrying.";
|
||||
if (!running_) {
|
||||
MS_LOG(ERROR) << "Worker send failed!";
|
||||
return false;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(kRetryDuration));
|
||||
}
|
||||
|
||||
int64_t single_id_len = SizeToLong(lookup_result->size() / lookup_ids.size());
|
||||
mindspore::HashMap<Key, std::shared_ptr<std::pair<float *, int64_t>>> id_addr_map;
|
||||
std::shared_ptr<std::vector<float>> values = std::make_shared<std::vector<float>>();
|
||||
std::shared_ptr<std::vector<Key>> keys = std::make_shared<std::vector<Key>>();
|
||||
int64_t value_offset = 0;
|
||||
for (size_t i = 0; i < resp.size(); ++i) {
|
||||
KVMessage message;
|
||||
CHECK_RETURN_TYPE(message.ParseFromArray(resp.at(i)->data(), resp.at(i)->size()));
|
||||
for (auto j = 0; j < message.values_size(); j++) {
|
||||
values->push_back(message.values(j));
|
||||
}
|
||||
for (auto k = 0; k < message.keys_size(); k++) {
|
||||
const Key &message_key = message.keys(k);
|
||||
keys->push_back(message_key);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < keys->size(); i++) {
|
||||
const Key &map_key = keys->at(i);
|
||||
float *addr = values->data() + value_offset;
|
||||
value_offset += single_id_len;
|
||||
id_addr_map[map_key] = std::make_shared<std::pair<float *, int64_t>>(std::make_pair(addr, single_id_len));
|
||||
}
|
||||
|
||||
float *result_addr = lookup_result->data();
|
||||
MS_EXCEPTION_IF_NULL(result_addr);
|
||||
int64_t offset = 0;
|
||||
size_t dst_size = 0;
|
||||
size_t src_size = 0;
|
||||
void *dst_data = nullptr;
|
||||
void *src_data = nullptr;
|
||||
for (size_t i = 0; i < lookup_ids.size(); i++) {
|
||||
if (id_addr_map.count(lookup_ids[i]) == 0) {
|
||||
offset += single_id_len;
|
||||
continue;
|
||||
}
|
||||
const Key &id_key = static_cast<Key>(lookup_ids[i]);
|
||||
auto &pair = id_addr_map[id_key];
|
||||
size_t size = LongToSize(single_id_len * sizeof(float));
|
||||
dst_size = size;
|
||||
src_size = size;
|
||||
dst_data = result_addr + offset;
|
||||
src_data = pair->first;
|
||||
MS_ERROR_IF_NULL(dst_data);
|
||||
MS_ERROR_IF_NULL(src_data);
|
||||
errno_t mem_ret = memcpy_s(dst_data, dst_size, src_data, src_size);
|
||||
if (mem_ret != EOK) {
|
||||
MS_LOG(ERROR) << "memcpy_s error, errorno(" << mem_ret << ")";
|
||||
return false;
|
||||
}
|
||||
offset += single_id_len;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Worker::UpdateEmbeddingTable(const std::vector<Key> &keys, const std::vector<int> &lookup_ids,
|
||||
const std::vector<float> &vals) {
|
||||
KVMessage kvs;
|
||||
*kvs.mutable_keys() = {keys.begin(), keys.end()};
|
||||
*kvs.mutable_len() = {lookup_ids.begin(), lookup_ids.end()};
|
||||
*kvs.mutable_values() = {vals.begin(), vals.end()};
|
||||
PartitionKVMessages messages;
|
||||
update_embedding_partitioner_(kvs, &messages, {});
|
||||
std::vector<uint32_t> rank_ids;
|
||||
std::vector<std::string> data_strs;
|
||||
for (size_t i = 0; i < messages.size(); i++) {
|
||||
if (messages.at(i).first) {
|
||||
rank_ids.push_back(i);
|
||||
data_strs.emplace_back(messages.at(i).second.SerializeAsString());
|
||||
}
|
||||
}
|
||||
while (!worker_node_.Send(core::NodeRole::SERVER, rank_ids, data_strs, LongToInt(kUpdateEmbeddingsCmd))) {
|
||||
MS_LOG(INFO) << "Worker send failed!, retrying.";
|
||||
if (!running_) {
|
||||
MS_LOG(ERROR) << "Worker send failed!";
|
||||
return false;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(kRetryDuration));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Worker::Finalize() {
|
||||
if (running_) {
|
||||
MS_LOG(INFO) << "Worker starts finalizing...";
|
||||
KVMessage kvs;
|
||||
kvs.add_keys(0);
|
||||
kvs.add_values(0.0f);
|
||||
const std::string &kv_data = kvs.SerializeAsString();
|
||||
worker_node_.Broadcast(core::NodeRole::SERVER, kv_data, kFinalizeCmd);
|
||||
worker_node_.Finish();
|
||||
worker_node_.Stop();
|
||||
running_ = false;
|
||||
MS_LOG(INFO) << "Worker finalized successfully.";
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::Initialize() {
|
||||
lookup_partitioner_ = [this](auto &&send, auto &&partition, auto &&attrs) {
|
||||
LookupIdPartitioner(send, partition, attrs);
|
||||
};
|
||||
worker_init_embedding_partitioner_ = [this](auto &&send, auto &&partition, auto &&attrs) {
|
||||
WorkerInitEmbeddingPartitioner(send, partition, attrs);
|
||||
};
|
||||
round_robin_partitioner_ = [this](auto &&send, auto &&partition, auto &&attrs) {
|
||||
RoundRobinPartitioner(send, partition, attrs);
|
||||
};
|
||||
sparse_partitioner_ = [this](auto &&send, auto &&partition, auto &&attrs) {
|
||||
SparsePartitioner(send, partition, attrs);
|
||||
};
|
||||
update_embedding_partitioner_ = [this](auto &&send, auto &&partition, auto &&attrs) {
|
||||
UpdateEmbeddingPartitioner(send, partition, attrs);
|
||||
};
|
||||
broadcast_partitioner_ = [this](auto &&send, auto &&partition, auto &&attrs) {
|
||||
BroadcastPartitioner(send, partition, attrs);
|
||||
};
|
||||
}
|
||||
|
||||
bool Worker::IsKeyInit(const size_t key) {
|
||||
if (init_keys_.find(key) == init_keys_.end() || !init_keys_[key]) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Worker::AddKeyToServerId(const Key &key) { AddKeyByHashMod(key); }
|
||||
|
||||
void Worker::AddKeyByHashMod(const Key &key) {
|
||||
if (server_num_ == 0) {
|
||||
MS_LOG(EXCEPTION) << "Server number is invalid:0";
|
||||
}
|
||||
key_to_server_id_[key] = static_cast<int64_t>(key % server_num_);
|
||||
MS_LOG(DEBUG) << "The server id of key " << key << " is " << key_to_server_id_[key];
|
||||
}
|
||||
|
||||
void Worker::InitPSOptimId(const size_t param_key) {
|
||||
MS_LOG(INFO) << "InitPSOptimId key is:" << param_key;
|
||||
if (key_to_optimId_.count(param_key) == 0) {
|
||||
MS_LOG(EXCEPTION) << "Can't find optimizer id of parameter key " << param_key;
|
||||
}
|
||||
int64_t optim_id = key_to_optimId_[param_key];
|
||||
|
||||
std::vector<Key> keys = {param_key};
|
||||
std::vector<float> optim_id_vals = {static_cast<float>(optim_id)};
|
||||
std::vector<int> optim_id_lens = {SizeToInt(optim_id_vals.size())};
|
||||
MS_LOG(INFO) << "The keys is" << keys << " the optim_id_vals is: " << optim_id_vals
|
||||
<< " optim_id_lens is:" << optim_id_lens;
|
||||
PushData(keys, optim_id_vals, optim_id_lens, kInitWeightToOptimIdCmd);
|
||||
}
|
||||
|
||||
void Worker::InitPSOptimInputShapes(const size_t key) {
|
||||
std::vector<Key> keys;
|
||||
std::vector<int> shape_len;
|
||||
std::vector<float> all_shape;
|
||||
std::vector<ShapeVector> shapes = key_to_optim_shapes_[key];
|
||||
for (auto shape : shapes) {
|
||||
keys.push_back(key);
|
||||
if (shape.size() == 0) {
|
||||
shape_len.push_back(1);
|
||||
all_shape.push_back(1);
|
||||
} else {
|
||||
shape_len.push_back(SizeToLong(shape.size()));
|
||||
std::transform(shape.begin(), shape.end(), std::back_inserter(all_shape),
|
||||
[](size_t dim) -> float { return static_cast<float>(dim); });
|
||||
}
|
||||
}
|
||||
MS_LOG(INFO) << "keys:" << keys;
|
||||
MS_LOG(INFO) << "shape_len:" << shape_len;
|
||||
MS_LOG(INFO) << "all_shape:" << all_shape;
|
||||
if (!init_keys_[key]) {
|
||||
init_keys_[key] = true;
|
||||
}
|
||||
PushData(keys, all_shape, shape_len, kInitOptimInputsShapeCmd);
|
||||
}
|
||||
|
||||
void Worker::InitPSParamData(const std::vector<size_t> &keys, void *const origin_addr, size_t size) {
|
||||
MS_EXCEPTION_IF_NULL(origin_addr);
|
||||
std::vector<float> addr{reinterpret_cast<float *>(origin_addr),
|
||||
reinterpret_cast<float *>(origin_addr) + size / sizeof(float)};
|
||||
std::vector<Key> key(keys);
|
||||
std::vector<int> lens;
|
||||
lens.push_back(addr.size());
|
||||
MS_LOG(INFO) << "the keys are:" << keys;
|
||||
MS_LOG(INFO) << "the values are:" << addr;
|
||||
PushData(key, addr, lens, kInitWeightsCmd);
|
||||
init_keys_[key[0]] = true;
|
||||
}
|
||||
|
||||
bool Worker::IsReadyForPush(const Key &key) {
|
||||
std::vector<float> result(1, 0);
|
||||
PullData({key}, &result, nullptr, kCheckReadyForPushCmd);
|
||||
MS_LOG(INFO) << "key:" << key;
|
||||
if (result[0] > 0) {
|
||||
MS_LOG(INFO) << "IsReadyForPush:";
|
||||
return true;
|
||||
} else {
|
||||
MS_LOG(INFO) << "IsReadyForPush:";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool Worker::IsReadyForPull(const Key &key) {
|
||||
std::vector<float> result(1, 0);
|
||||
PullData({key}, &result, nullptr, kCheckReadyForPullCmd);
|
||||
if (result[0] > 0) {
|
||||
MS_LOG(INFO) << "IsReadyForPull";
|
||||
return true;
|
||||
} else {
|
||||
MS_LOG(INFO) << "IsReadyForPull";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::PrepareSparseGradient(const size_t, const size_t, const mindspore::HashSet<int> &distinct_ids,
|
||||
const std::vector<std::pair<int, float *>> &indice_to_grads, const int *all_indice,
|
||||
const size_t segment_size, float *gradient, int *indices) {
|
||||
MS_EXCEPTION_IF_NULL(all_indice);
|
||||
MS_EXCEPTION_IF_NULL(gradient);
|
||||
MS_EXCEPTION_IF_NULL(indices);
|
||||
size_t offset = 0;
|
||||
int64_t index = 0;
|
||||
size_t segment_data_size = segment_size * sizeof(float);
|
||||
size_t dst_size;
|
||||
size_t src_size;
|
||||
void *dst_data = nullptr;
|
||||
void *src_data = nullptr;
|
||||
for (auto &pair : indice_to_grads) {
|
||||
if (distinct_ids.count(pair.first) == 0) {
|
||||
continue;
|
||||
}
|
||||
indices[index++] = pair.first;
|
||||
|
||||
dst_size = segment_data_size;
|
||||
src_size = segment_data_size;
|
||||
dst_data = gradient + offset;
|
||||
src_data = pair.second;
|
||||
MS_EXCEPTION_IF_NULL(dst_data);
|
||||
MS_EXCEPTION_IF_NULL(src_data);
|
||||
errno_t ret = memcpy_s(gradient + offset, dst_size, pair.second, src_size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(ERROR) << "memcpy_s error, errorno(" << ret << ")";
|
||||
return;
|
||||
}
|
||||
offset += segment_size;
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::BuildSparseValue(const std::vector<int> &lengths, const size_t grad_index, const size_t indice_index,
|
||||
const float *original_data, const float *grads, int *indices,
|
||||
std::vector<float> *reduced_data) {
|
||||
MS_EXCEPTION_IF_NULL(original_data);
|
||||
MS_EXCEPTION_IF_NULL(grads);
|
||||
MS_EXCEPTION_IF_NULL(indices);
|
||||
MS_EXCEPTION_IF_NULL(reduced_data);
|
||||
int64_t offset = 0;
|
||||
size_t dst_size = 0;
|
||||
size_t src_size = 0;
|
||||
void *dst_data = nullptr;
|
||||
void *src_data = nullptr;
|
||||
for (size_t i = 0; i < lengths.size(); i++) {
|
||||
if (i != grad_index && i != indice_index) {
|
||||
size_t data_size = lengths[i] * sizeof(float);
|
||||
dst_size = data_size;
|
||||
src_size = data_size;
|
||||
dst_data = reduced_data->data() + offset;
|
||||
src_data = const_cast<float *>(original_data) + offset;
|
||||
MS_EXCEPTION_IF_NULL(dst_data);
|
||||
MS_EXCEPTION_IF_NULL(src_data);
|
||||
errno_t mem_ret = memcpy_s(dst_data, dst_size, src_data, src_size);
|
||||
if (mem_ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << mem_ret << ")";
|
||||
return;
|
||||
}
|
||||
}
|
||||
offset += lengths[i];
|
||||
}
|
||||
|
||||
// Fill the reduced gradient
|
||||
int64_t grad_offset = 0;
|
||||
for (size_t i = 0; i < grad_index; i++) {
|
||||
grad_offset += lengths[i];
|
||||
}
|
||||
size_t data_size = lengths[grad_index] * sizeof(float);
|
||||
dst_size = data_size;
|
||||
src_size = data_size;
|
||||
dst_data = reduced_data->data() + grad_offset;
|
||||
src_data = const_cast<float *>(grads);
|
||||
MS_EXCEPTION_IF_NULL(dst_data);
|
||||
errno_t ret = memcpy_s(dst_data, dst_size, src_data, src_size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
return;
|
||||
}
|
||||
|
||||
// Fill the reduced indice
|
||||
int64_t indice_offset = grad_offset + lengths[grad_index];
|
||||
data_size = lengths[indice_index] * sizeof(float);
|
||||
float *indice_data = reduced_data->data() + indice_offset;
|
||||
dst_size = data_size;
|
||||
src_size = data_size;
|
||||
dst_data = indice_data;
|
||||
src_data = indices;
|
||||
MS_EXCEPTION_IF_NULL(dst_data);
|
||||
MS_EXCEPTION_IF_NULL(src_data);
|
||||
ret = memcpy_s(dst_data, dst_size, src_data, src_size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::PushData(const std::vector<Key> &keys, const std::vector<float> &vals, const std::vector<int> &lens,
|
||||
int cmd, int64_t) {
|
||||
KVMessage kvs;
|
||||
*kvs.mutable_keys() = {keys.begin(), keys.end()};
|
||||
*kvs.mutable_values() = {vals.begin(), vals.end()};
|
||||
*kvs.mutable_len() = {lens.begin(), lens.end()};
|
||||
MS_LOG(INFO) << "the result is:" << embedding_table_ranges_.count(keys[0]);
|
||||
if (embedding_table_ranges_.count(keys[0])) {
|
||||
if (cmd == kInitWeightsCmd) {
|
||||
SendForPush(cmd, kvs, worker_init_embedding_partitioner_, {});
|
||||
} else {
|
||||
const std::string &kv_data = kvs.SerializeAsString();
|
||||
worker_node_.Broadcast(core::NodeRole::SERVER, kv_data, cmd);
|
||||
}
|
||||
} else {
|
||||
SendForPush(cmd, kvs, round_robin_partitioner_, {});
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::PushSparseData(const std::vector<Key> &keys, const std::vector<float> &vals, const std::vector<int> &lens,
|
||||
size_t grad_index, size_t indice_index, size_t first_dim_size, size_t outer_dim_size) {
|
||||
KVMessage kvs;
|
||||
*kvs.mutable_keys() = {keys.begin(), keys.end()};
|
||||
*kvs.mutable_values() = {vals.begin(), vals.end()};
|
||||
*kvs.mutable_len() = {lens.begin(), lens.end()};
|
||||
if (embedding_table_ranges_.count(keys[0])) {
|
||||
std::map<int64_t, int64_t> attrs{{0, grad_index}, {1, indice_index}, {2, first_dim_size}, {3, outer_dim_size}};
|
||||
SendForPush(kPushCmd, kvs, sparse_partitioner_, attrs);
|
||||
} else {
|
||||
SendForPush(kPushCmd, kvs, round_robin_partitioner_, {});
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::PullData(const std::vector<Key> &keys, std::vector<float> *const vals, std::vector<int> *lens, int cmd,
|
||||
int64_t priority) {
|
||||
MS_EXCEPTION_IF_NULL(vals);
|
||||
KVMessage kvs;
|
||||
*kvs.mutable_keys() = {keys.begin(), keys.end()};
|
||||
if (embedding_table_ranges_.count(keys[0])) {
|
||||
SendForPull(cmd, kvs, broadcast_partitioner_, {}, vals, lens);
|
||||
} else {
|
||||
SendForPull(cmd, kvs, round_robin_partitioner_, {}, vals, lens);
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::LookupIdPartitioner(const EmbeddingTableLookup &send, PartitionEmbeddingMessages *partition,
|
||||
const std::map<int64_t, int64_t> &) {
|
||||
MS_EXCEPTION_IF_NULL(partition);
|
||||
|
||||
const Key &key = send.key();
|
||||
const std::vector<EmbeddingTableShardMetadata> &ranges = *(embedding_table_ranges_[key]);
|
||||
partition->resize(ranges.size());
|
||||
|
||||
for (size_t i = 0; i < ranges.size(); i++) {
|
||||
const EmbeddingTableShardMetadata &range = ranges[i];
|
||||
const auto &begin = range.begin();
|
||||
const auto &end = range.end();
|
||||
mindspore::HashSet<int32_t> unique_ids;
|
||||
auto &kvs = partition->at(i).second;
|
||||
|
||||
kvs.set_key(key);
|
||||
|
||||
std::for_each(send.keys().begin(), send.keys().end(), [&](int32_t lookup_id) {
|
||||
if (lookup_id >= SizeToInt(begin) && lookup_id <= SizeToInt(end)) {
|
||||
unique_ids.insert(lookup_id);
|
||||
}
|
||||
});
|
||||
MS_LOG(DEBUG) << "The unique ids size is:" << unique_ids.size();
|
||||
|
||||
for (const auto &lookup_id : unique_ids) {
|
||||
kvs.add_keys(lookup_id);
|
||||
kvs.add_values(0.0f);
|
||||
}
|
||||
|
||||
if (kvs.keys().empty()) {
|
||||
partition->at(i).first = false;
|
||||
} else {
|
||||
partition->at(i).first = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::SparsePartitioner(const KVMessage &send, PartitionKVMessages *partition,
|
||||
const std::map<int64_t, int64_t> &attrs) {
|
||||
MS_EXCEPTION_IF_NULL(partition);
|
||||
// Init variables
|
||||
float *data = const_cast<float *>(send.values().data());
|
||||
|
||||
if (attrs.count(kGradIndex) == 0 || attrs.count(kIndiceIndex) == 0 || attrs.count(kFirstDimSize) == 0 ||
|
||||
attrs.count(kOutDimSize) == 0) {
|
||||
MS_LOG(EXCEPTION) << "Invalid attrs keys";
|
||||
}
|
||||
auto iter = attrs.find(kGradIndex);
|
||||
size_t grad_index = static_cast<size_t>(iter->second);
|
||||
iter = attrs.find(kIndiceIndex);
|
||||
size_t indice_index = static_cast<size_t>(iter->second);
|
||||
iter = attrs.find(kFirstDimSize);
|
||||
size_t first_dim_size = static_cast<size_t>(iter->second);
|
||||
iter = attrs.find(kOutDimSize);
|
||||
size_t outer_dim_size = static_cast<size_t>(iter->second);
|
||||
|
||||
size_t grad_size = send.len()[SizeToInt(grad_index)];
|
||||
size_t indice_size = send.len()[SizeToInt(indice_index)];
|
||||
size_t segment_size = grad_size / indice_size;
|
||||
|
||||
size_t grad_offset = 0;
|
||||
size_t indice_offset = 0;
|
||||
for (size_t i = 0; i < grad_index; i++) {
|
||||
grad_offset += send.len()[i];
|
||||
}
|
||||
for (size_t j = 0; j < indice_index; j++) {
|
||||
indice_offset += send.len()[j];
|
||||
}
|
||||
|
||||
float *grad_data = data + grad_offset;
|
||||
void *indice_data_temp = data + indice_offset;
|
||||
int *indice_data = reinterpret_cast<int *>(indice_data_temp);
|
||||
|
||||
// Build the mappings of indice to gradient
|
||||
std::vector<std::pair<int, float *>> indice_to_grads;
|
||||
for (size_t i = 0; i < indice_size; i++) {
|
||||
int indice = indice_data[i];
|
||||
float *grad = grad_data + i * segment_size;
|
||||
indice_to_grads.push_back(std::make_pair(indice, grad));
|
||||
}
|
||||
|
||||
const Key &key = send.keys()[0];
|
||||
const std::vector<EmbeddingTableShardMetadata> &ranges = *(embedding_table_ranges_[key]);
|
||||
partition->resize(ranges.size());
|
||||
|
||||
// Construct reduced sparse data for each server
|
||||
for (size_t i = 0; i < ranges.size(); i++) {
|
||||
const EmbeddingTableShardMetadata &range = ranges[i];
|
||||
const auto &begin = range.begin();
|
||||
const auto &end = range.end();
|
||||
auto &kvs = partition->at(i).second;
|
||||
*kvs.mutable_keys() = {send.keys().begin(), send.keys().end()};
|
||||
*kvs.mutable_len() = {send.len().begin(), send.len().end()};
|
||||
|
||||
// Prepare the sparse gradient and indice
|
||||
std::vector<int> indice_ids;
|
||||
mindspore::HashSet<int> distinct_ids;
|
||||
for (size_t j = 0; j < indice_size; j++) {
|
||||
size_t indice = static_cast<size_t>(indice_data[j]);
|
||||
if (indice >= begin && indice <= end) {
|
||||
indice_ids.push_back(indice);
|
||||
distinct_ids.insert(indice);
|
||||
}
|
||||
}
|
||||
size_t indices_size = indice_ids.size();
|
||||
if (indices_size > 0) {
|
||||
size_t partition_segment_size = indices_size * segment_size;
|
||||
std::vector<float> src_grad_data(partition_segment_size);
|
||||
std::vector<int> src_indice_data(indices_size);
|
||||
PrepareSparseGradient(begin, end, distinct_ids, indice_to_grads, indice_data, segment_size, src_grad_data.data(),
|
||||
src_indice_data.data());
|
||||
|
||||
// Reduce the sparse gradient and indice
|
||||
std::vector<float> new_grad(partition_segment_size);
|
||||
std::vector<int> new_indices(indices_size);
|
||||
mindspore::kernel::SparseGradient<int> unique_sparse_grad({new_grad.data(), new_indices.data(), indices_size});
|
||||
Util::ReduceSparseGradient(src_grad_data.data(), src_indice_data.data(), indices_size, segment_size,
|
||||
first_dim_size, outer_dim_size, &unique_sparse_grad);
|
||||
|
||||
// Update the length of reduce sparse gradient and indice
|
||||
std::vector<int> reduced_lens = {kvs.len().begin(), kvs.len().end()};
|
||||
reduced_lens[grad_index] = unique_sparse_grad.indices_size_ * segment_size;
|
||||
reduced_lens[indice_index] = unique_sparse_grad.indices_size_;
|
||||
|
||||
// Build the sparse value to be sent
|
||||
size_t total_size = std::accumulate(reduced_lens.begin(), reduced_lens.end(), 0, std::plus<int>());
|
||||
std::vector<float> reduced_data(total_size, 0);
|
||||
BuildSparseValue(reduced_lens, grad_index, indice_index, data, unique_sparse_grad.value_,
|
||||
unique_sparse_grad.indices_, &reduced_data);
|
||||
|
||||
*kvs.mutable_len() = {reduced_lens.begin(), reduced_lens.end()};
|
||||
*kvs.mutable_values() = {reduced_data.begin(), reduced_data.end()};
|
||||
}
|
||||
|
||||
if (indices_size == 0) {
|
||||
std::vector<float> no_keys;
|
||||
std::vector<float> no_vals;
|
||||
std::vector<float> no_lens;
|
||||
no_keys.push_back(key);
|
||||
no_vals.push_back(kGradValue);
|
||||
*kvs.mutable_values() = {no_vals.begin(), no_vals.end()};
|
||||
*kvs.mutable_len() = {no_lens.begin(), no_lens.end()};
|
||||
}
|
||||
partition->at(i).first = true;
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::RoundRobinPartitioner(const KVMessage &send, PartitionKVMessages *partition,
|
||||
const std::map<int64_t, int64_t> &) {
|
||||
MS_EXCEPTION_IF_NULL(partition);
|
||||
partition->resize(LongToSize(server_num_));
|
||||
auto keys = send.keys();
|
||||
auto values = send.values();
|
||||
auto lens = send.len();
|
||||
MS_LOG(INFO) << "the key size is:" << send.keys_size() << " the values size is:" << send.values_size()
|
||||
<< " the lens:" << send.len_size();
|
||||
|
||||
size_t len;
|
||||
Key param_key;
|
||||
for (int i = 0; i < send.keys_size(); i++) {
|
||||
param_key = keys[i];
|
||||
int64_t server_id = key_to_server_id_[param_key];
|
||||
if (!partition->at(LongToUlong(server_id)).first) {
|
||||
partition->at(LongToUlong(server_id)).first = true;
|
||||
}
|
||||
|
||||
KVMessage &server_kv_pairs = partition->at(LongToUlong(server_id)).second;
|
||||
server_kv_pairs.add_keys(param_key);
|
||||
if (values.empty()) {
|
||||
continue;
|
||||
}
|
||||
len = lens[i];
|
||||
int64_t offset = std::accumulate(lens.begin(), lens.begin() + i, 0);
|
||||
auto val_begin = values.begin() + offset;
|
||||
auto val_end = val_begin + len;
|
||||
for (auto it = val_begin; it != val_end; ++it) {
|
||||
server_kv_pairs.add_values(*it);
|
||||
}
|
||||
server_kv_pairs.add_len(len);
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::WorkerInitEmbeddingPartitioner(const KVMessage &send, std::vector<std::pair<bool, KVMessage>> *partition,
|
||||
const std::map<int64_t, int64_t> &) {
|
||||
MS_EXCEPTION_IF_NULL(partition);
|
||||
partition->resize(LongToSize(server_num_));
|
||||
auto keys = send.keys();
|
||||
auto values = send.values();
|
||||
auto lens = send.len();
|
||||
|
||||
int32_t col_cnt = lens[0] / embedding_row_cnt_[keys[0]];
|
||||
const std::vector<EmbeddingTableShardMetadata> &ranges = *(embedding_table_ranges_[keys[0]]);
|
||||
for (size_t i = 0; i < ranges.size(); i++) {
|
||||
size_t offset_begin = ranges[i].begin() * col_cnt;
|
||||
size_t offset_end = (ranges[i].end() + 1) * col_cnt;
|
||||
KVMessage kvs;
|
||||
*kvs.mutable_keys() = keys;
|
||||
*kvs.mutable_values() = {values.begin() + offset_begin, values.begin() + offset_end};
|
||||
kvs.add_len(offset_end - offset_begin);
|
||||
partition->at(i).first = true;
|
||||
partition->at(i).second = kvs;
|
||||
}
|
||||
}
|
||||
void Worker::UpdateEmbeddingPartitioner(const KVMessage &send, PartitionKVMessages *partition,
|
||||
const std::map<int64_t, int64_t> &) {
|
||||
MS_EXCEPTION_IF_NULL(partition);
|
||||
const float *embedding_vals = send.values().data();
|
||||
const uint64_t *lookup_ids = send.len().data();
|
||||
size_t val_size = IntToSize(send.values_size());
|
||||
size_t id_size = IntToSize(send.len_size());
|
||||
if (id_size == 0) {
|
||||
MS_LOG(EXCEPTION) << "The id size is 0.";
|
||||
return;
|
||||
}
|
||||
size_t embedding_dim = val_size / id_size;
|
||||
|
||||
const Key &key = send.keys()[0];
|
||||
const std::vector<EmbeddingTableShardMetadata> &ranges = *(embedding_table_ranges_[key]);
|
||||
partition->resize(ranges.size());
|
||||
|
||||
for (size_t i = 0; i < ranges.size(); i++) {
|
||||
const EmbeddingTableShardMetadata &range = ranges[i];
|
||||
const auto &begin = range.begin();
|
||||
const auto &end = range.end();
|
||||
auto &kvs = partition->at(i).second;
|
||||
kvs.add_keys(key);
|
||||
for (size_t j = 0; j < id_size; j++) {
|
||||
auto lookup_id = lookup_ids[j];
|
||||
if (lookup_id >= begin && lookup_id <= end) {
|
||||
kvs.add_keys(lookup_id);
|
||||
for (size_t k = 0; k < embedding_dim; k++) {
|
||||
kvs.add_values(embedding_vals[j * embedding_dim + k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (kvs.keys_size() <= 1) {
|
||||
partition->at(i).first = false;
|
||||
} else {
|
||||
partition->at(i).first = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::BroadcastPartitioner(const KVMessage &send, PartitionKVMessages *partition,
|
||||
const std::map<int64_t, int64_t> &) {
|
||||
MS_EXCEPTION_IF_NULL(partition);
|
||||
partition->resize(LongToSize(server_num_));
|
||||
for (size_t i = 0; i < LongToSize(server_num_); i++) {
|
||||
partition->at(i).first = true;
|
||||
partition->at(i).second = send;
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::SendForPush(int cmd, const KVMessage &send, const KVPartitioner &partitioner,
|
||||
const std::map<int64_t, int64_t> &attrs) {
|
||||
PartitionKVMessages messages;
|
||||
partitioner(send, &messages, attrs);
|
||||
std::vector<uint32_t> rank_ids;
|
||||
std::vector<std::string> data_strs;
|
||||
for (size_t i = 0; i < messages.size(); i++) {
|
||||
if (messages.at(i).first) {
|
||||
rank_ids.push_back(i);
|
||||
data_strs.emplace_back(messages.at(i).second.SerializeAsString());
|
||||
}
|
||||
}
|
||||
worker_node_.Send(core::NodeRole::SERVER, rank_ids, data_strs, cmd);
|
||||
}
|
||||
|
||||
void Worker::SendForPull(int cmd, const KVMessage &send, const KVPartitioner &partitioner,
|
||||
const std::map<int64_t, int64_t> &, std::vector<float> *vals, std::vector<int> *lens) {
|
||||
MS_EXCEPTION_IF_NULL(vals);
|
||||
PartitionKVMessages messages;
|
||||
partitioner(send, &messages, {});
|
||||
std::vector<uint32_t> rank_ids;
|
||||
std::vector<std::string> data_strs;
|
||||
for (size_t i = 0; i < messages.size(); i++) {
|
||||
if (messages.at(i).first) {
|
||||
rank_ids.push_back(i);
|
||||
data_strs.emplace_back(messages.at(i).second.SerializeAsString());
|
||||
}
|
||||
}
|
||||
std::vector<VectorPtr> resp;
|
||||
worker_node_.Send(core::NodeRole::SERVER, rank_ids, data_strs, cmd, &resp);
|
||||
vals->clear();
|
||||
for (size_t i = 0; i < resp.size(); ++i) {
|
||||
KVMessage message;
|
||||
CHECK_RETURN_TYPE(message.ParseFromArray(resp.at(i)->data(), SizeToInt(resp.at(i)->size())));
|
||||
std::copy(message.values().begin(), message.values().end(), std::back_inserter(*vals));
|
||||
|
||||
if (lens) {
|
||||
lens->clear();
|
||||
std::copy(message.len().begin(), message.len().end(), std::back_inserter(*lens));
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
|
@ -1,151 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PS_WORKER_H_
|
||||
#define MINDSPORE_CCSRC_PS_WORKER_H_
|
||||
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <numeric>
|
||||
#include <functional>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "utils/hash_map.h"
|
||||
#include "utils/hash_set.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "ir/tensor.h"
|
||||
#include "ps/util.h"
|
||||
#include "ps/constants.h"
|
||||
#include "utils/shape_utils.h"
|
||||
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
|
||||
#include "ps/core/ps_worker_node.h"
|
||||
#include "ps/embedding_table_shard_metadata.h"
|
||||
#include "proto/comm.pb.h"
|
||||
#include "proto/ps.pb.h"
|
||||
#include "ps/ps_context.h"
|
||||
#include "include/backend/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
class BACKEND_EXPORT Worker {
|
||||
public:
|
||||
static Worker &GetInstance();
|
||||
using Callback = std::function<void()>;
|
||||
using PartitionEmbeddingMessages = std::vector<std::pair<bool, EmbeddingTableLookup>>;
|
||||
using PartitionKVMessages = std::vector<std::pair<bool, KVMessage>>;
|
||||
|
||||
using EmbeddingPartitioner = std::function<void(
|
||||
const EmbeddingTableLookup &send, PartitionEmbeddingMessages *partition, const std::map<int64_t, int64_t> &attrs)>;
|
||||
using KVPartitioner =
|
||||
std::function<void(const KVMessage &send, PartitionKVMessages *partition, const std::map<int64_t, int64_t> &attrs)>;
|
||||
|
||||
void Run();
|
||||
void Push(const std::vector<size_t> &keys, std::vector<uintptr_t> addrs, const ShapeVector &sizes);
|
||||
void Pull(const size_t key, void *dev_addr, const size_t size);
|
||||
size_t SetParamKey(const std::string ¶m_name);
|
||||
size_t GetParamKey(const std::string ¶m_name);
|
||||
void SetParamInitInServer(const std::string ¶m_name, bool init_in_server);
|
||||
bool GetParamInitInServer(const std::string ¶m_name);
|
||||
void SetKeyOptimId(size_t key, const std::string &optimizer_name);
|
||||
void SetOptimInputShapes(size_t key, const ShapeVector &shape);
|
||||
void AddEmbeddingTable(const Key &key, const size_t &row_count);
|
||||
bool InitPSEmbeddingTable(const size_t &key, const std::vector<size_t> &input_shape,
|
||||
const std::vector<size_t> &indices_shape, const std::vector<size_t> &output_shape,
|
||||
const ParamInitInfoMessage &info, uint32_t timeout = core::kCommTimeoutInSeconds);
|
||||
void InitPSParamAndOptim(const AnfNodePtr &input_node, const tensor::TensorPtr &tensor);
|
||||
bool DoPSEmbeddingLookup(const Key &key, const std::vector<int> &lookup_ids, std::vector<float> *lookup_result,
|
||||
int64_t cmd);
|
||||
bool UpdateEmbeddingTable(const std::vector<Key> &keys, const std::vector<int> &lookup_ids,
|
||||
const std::vector<float> &vals);
|
||||
|
||||
bool running() const { return running_; }
|
||||
void Finalize();
|
||||
|
||||
private:
|
||||
Worker() : server_num_(-1), running_(false), key_cnt_(0) {}
|
||||
~Worker() = default;
|
||||
Worker(const Worker &) = delete;
|
||||
Worker &operator=(const Worker &) = delete;
|
||||
|
||||
void Initialize();
|
||||
bool IsKeyInit(const size_t key);
|
||||
void AddKeyToServerId(const Key &key);
|
||||
void AddKeyByHashMod(const Key &key);
|
||||
void InitPSOptimId(const size_t param_key);
|
||||
void InitPSOptimInputShapes(const size_t key);
|
||||
void InitPSParamData(const std::vector<size_t> &keys, void *const origin_addr, size_t size);
|
||||
bool IsReadyForPush(const Key &key);
|
||||
bool IsReadyForPull(const Key &key);
|
||||
void PrepareSparseGradient(const size_t begin, const size_t end, const mindspore::HashSet<int> &distinct_ids,
|
||||
const std::vector<std::pair<int, float *>> &indice_to_grads, const int *all_indice,
|
||||
const size_t segment_size, float *gradient, int *indices);
|
||||
void BuildSparseValue(const std::vector<int> &lengths, const size_t grad_index, const size_t indice_index,
|
||||
const float *original_data, const float *grads, int *indices, std::vector<float> *reduced_data);
|
||||
|
||||
void PushData(const std::vector<Key> &keys, const std::vector<float> &vals, const std::vector<int> &lens = {},
|
||||
int command = 0, int64_t priority = 0);
|
||||
void PushSparseData(const std::vector<Key> &keys, const std::vector<float> &vals, const std::vector<int> &lens,
|
||||
size_t grad_index, size_t indice_index, size_t first_dim_size, size_t outer_dim_size);
|
||||
void PullData(const std::vector<Key> &keys, std::vector<float> *const vals, std::vector<int> *lens = nullptr,
|
||||
int cmd = 0, int64_t priority = 0);
|
||||
|
||||
void LookupIdPartitioner(const EmbeddingTableLookup &send, PartitionEmbeddingMessages *partition,
|
||||
const std::map<int64_t, int64_t> &attrs);
|
||||
|
||||
void SparsePartitioner(const KVMessage &send, PartitionKVMessages *partition,
|
||||
const std::map<int64_t, int64_t> &attrs);
|
||||
void RoundRobinPartitioner(const KVMessage &send, PartitionKVMessages *partition,
|
||||
const std::map<int64_t, int64_t> &attrs);
|
||||
void WorkerInitEmbeddingPartitioner(const KVMessage &send, std::vector<std::pair<bool, KVMessage>> *partition,
|
||||
const std::map<int64_t, int64_t> &attrs);
|
||||
void UpdateEmbeddingPartitioner(const KVMessage &send, PartitionKVMessages *partition,
|
||||
const std::map<int64_t, int64_t> &attrs);
|
||||
void BroadcastPartitioner(const KVMessage &send, PartitionKVMessages *partition,
|
||||
const std::map<int64_t, int64_t> &attrs);
|
||||
void SendForPush(int cmd, const KVMessage &send, const KVPartitioner &partitioner,
|
||||
const std::map<int64_t, int64_t> &attrs);
|
||||
void SendForPull(int cmd, const KVMessage &send, const KVPartitioner &partitioner,
|
||||
const std::map<int64_t, int64_t> &attrs, std::vector<float> *vals, std::vector<int> *lens);
|
||||
|
||||
int64_t server_num_;
|
||||
bool running_;
|
||||
std::mutex running_mutex_;
|
||||
size_t key_cnt_;
|
||||
std::map<std::string, size_t> param_to_key_;
|
||||
std::map<size_t, bool> init_keys_;
|
||||
std::map<size_t, int64_t> key_to_optimId_;
|
||||
std::map<size_t, std::vector<ShapeVector>> key_to_optim_shapes_;
|
||||
std::map<std::string, bool> param_to_init_in_server_;
|
||||
core::PSWorkerNode worker_node_;
|
||||
|
||||
EmbeddingPartitioner lookup_partitioner_;
|
||||
KVPartitioner sparse_partitioner_;
|
||||
KVPartitioner round_robin_partitioner_;
|
||||
KVPartitioner worker_init_embedding_partitioner_;
|
||||
KVPartitioner update_embedding_partitioner_;
|
||||
KVPartitioner broadcast_partitioner_;
|
||||
mindspore::HashMap<Key, int64_t> key_to_server_id_;
|
||||
mindspore::HashMap<Key, size_t> embedding_row_cnt_;
|
||||
|
||||
mindspore::HashMap<Key, std::shared_ptr<std::vector<EmbeddingTableShardMetadata>>> embedding_table_ranges_;
|
||||
};
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PS_WORKER_H_
|
|
@ -38,9 +38,6 @@
|
|||
#include "include/common/utils/utils.h"
|
||||
#include "include/common/utils/parallel_context.h"
|
||||
#include "include/common/debug/env_config_parser.h"
|
||||
#ifdef WITH_BACKEND
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#endif
|
||||
#include "kernel/common_utils.h"
|
||||
|
||||
using mindspore::kernel::Address;
|
||||
|
@ -644,9 +641,6 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph &graph) {
|
|||
}
|
||||
add_need_alloc_nodes(input_node);
|
||||
}
|
||||
#ifdef WITH_BACKEND
|
||||
bool ps_cache_check = false;
|
||||
#endif
|
||||
std::map<AnfNodePtr, AnfNodePtr> shadow_backend_node_map;
|
||||
GetShadowBackendNodeMap(graph, &shadow_backend_node_map);
|
||||
for (auto &item : need_alloc_nodes) {
|
||||
|
@ -660,25 +654,6 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph &graph) {
|
|||
continue;
|
||||
}
|
||||
DeviceAddressPtr device_address = GetInternalDeviceAddress(graph, item);
|
||||
#ifdef WITH_BACKEND
|
||||
const std::string ¶m_name = item->fullname_with_scope();
|
||||
if (ps::ps_cache_instance.IsHashTable(param_name) && !ps::PSContext::instance()->enable_distributed_mindrt()) {
|
||||
MS_LOG(INFO) << "Parameter(" << param_name << ")"
|
||||
<< " enables the embeddingLookup cache in parameter server training mode.";
|
||||
// PS embeddingLookup cache check.
|
||||
if (!ps_cache_check) {
|
||||
CheckIfSupportPSEmbeddingCache(graph);
|
||||
ps_cache_check = true;
|
||||
}
|
||||
const auto &address = ps::ps_cache_instance.QueryHashTableAddr(param_name);
|
||||
MS_EXCEPTION_IF_NULL(address.addr);
|
||||
device_address = CreateDeviceAddress(address.addr, address.size, AnfAlgo::GetOutputFormat(item, index),
|
||||
output_type_id, {item, index});
|
||||
device_address->set_host_shape(trans::GetRuntimePaddingShape(item, index));
|
||||
AnfAlgo::SetOutputAddr(device_address, index, item.get());
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
GetDeviceAddress(item, shadow_backend_node_map, index, graph.graph_id(), &device_address);
|
||||
AnfAlgo::SetOutputAddr(device_address, index, item.get());
|
||||
}
|
||||
|
@ -1859,147 +1834,5 @@ bool KernelRuntime::LaunchKernels(const session::KernelGraph &graph) {
|
|||
void KernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
|
||||
MS_LOG(INFO) << "Clear graph:" << graph_id << " runtime resource";
|
||||
}
|
||||
|
||||
#ifdef WITH_BACKEND
|
||||
namespace {
|
||||
// Finalize ps cache module before throw an exception.
|
||||
void FinalizePsCache(const std::string &exception) {
|
||||
ps::ps_cache_instance.Finalize();
|
||||
MS_LOG(EXCEPTION) << exception;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void KernelRuntime::GetFirstPSEmbeddingCache(const session::KernelGraph &graph,
|
||||
AnfNodePtr *const first_cache_input_index,
|
||||
size_t *const first_cache_size) const {
|
||||
for (const auto &kernel : graph.execution_order()) {
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel);
|
||||
if (kernel_name != kGatherV2OpName && kernel_name != kSparseGatherV2OpName) {
|
||||
continue;
|
||||
}
|
||||
auto input_param = common::AnfAlgo::GetPrevNodeOutput(kernel, 0, true);
|
||||
auto input_index = common::AnfAlgo::GetPrevNodeOutput(kernel, 1, true);
|
||||
MS_EXCEPTION_IF_NULL(input_param.first);
|
||||
MS_EXCEPTION_IF_NULL(input_index.first);
|
||||
auto param_name = input_param.first->fullname_with_scope();
|
||||
if (!ps::ps_cache_instance.IsHashTable(param_name)) {
|
||||
continue;
|
||||
}
|
||||
auto size = ps::ps_cache_instance.QueryHashTableSize(param_name);
|
||||
while (input_index.first->isa<CNode>() && (common::AnfAlgo::GetCNodeName(input_index.first) == kCastOpName)) {
|
||||
input_index = common::AnfAlgo::GetPrevNodeOutput(input_index.first, 0, true);
|
||||
MS_EXCEPTION_IF_NULL(input_index.first);
|
||||
}
|
||||
auto cnode = common::AnfAlgo::IsGraphKernel(input_index.first)
|
||||
? common::AnfAlgo::GetOutputOfGraphkernel(input_index)
|
||||
: input_index.first;
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (!cnode->isa<CNode>()) {
|
||||
FinalizePsCache("The embeddingLookup whose input index should be a CNode but got " +
|
||||
cnode->fullname_with_scope());
|
||||
}
|
||||
auto input_index_node_name = common::AnfAlgo::GetCNodeName(cnode);
|
||||
if (input_index_node_name != kGetNextOpName) {
|
||||
bool full_batch = parallel::ParallelContext::GetInstance()->full_batch();
|
||||
if ((!full_batch && (input_index_node_name != kUniqueOpName)) ||
|
||||
(full_batch && (input_index_node_name != kMinimumOpName))) {
|
||||
MS_LOG(ERROR) << "The input index of the embeddingLookup(" << kernel->fullname_with_scope()
|
||||
<< ") cache is from " << cnode->fullname_with_scope();
|
||||
FinalizePsCache(
|
||||
"The embeddingLookup whose input index isn't from dataset doesn't support cache in parameter server training "
|
||||
"mode.");
|
||||
}
|
||||
}
|
||||
*first_cache_input_index = cnode;
|
||||
*first_cache_size = size;
|
||||
MS_LOG(INFO) << "The input index of the first embeddingLookup cache is from " << cnode->fullname_with_scope()
|
||||
<< ", the cache size is " << size;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void KernelRuntime::CheckSparsePSEmbeddingCache(const CNodePtr &node) const {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
auto pre_node = common::AnfAlgo::GetPrevNodeOutput(node, 1, true);
|
||||
MS_EXCEPTION_IF_NULL(pre_node.first);
|
||||
while (pre_node.first->isa<CNode>() && (common::AnfAlgo::GetCNodeName(pre_node.first) != kUniqueOpName)) {
|
||||
pre_node = common::AnfAlgo::GetPrevNodeOutput(pre_node.first, 0, true);
|
||||
MS_EXCEPTION_IF_NULL(pre_node.first);
|
||||
}
|
||||
if (!(pre_node.first->isa<CNode>()) || (common::AnfAlgo::GetCNodeName(pre_node.first) != kUniqueOpName)) {
|
||||
FinalizePsCache("The input_indices of kernel[SparseGatherV2] must be unique in parameter server cache mode");
|
||||
}
|
||||
|
||||
pre_node = common::AnfAlgo::GetPrevNodeOutput(pre_node.first, 0, true);
|
||||
MS_EXCEPTION_IF_NULL(pre_node.first);
|
||||
while (pre_node.first->isa<CNode>() && (common::AnfAlgo::GetCNodeName(pre_node.first) == kCastOpName)) {
|
||||
pre_node = common::AnfAlgo::GetPrevNodeOutput(pre_node.first, 0, true);
|
||||
MS_EXCEPTION_IF_NULL(pre_node.first);
|
||||
}
|
||||
if (!(pre_node.first->isa<CNode>()) || (common::AnfAlgo::GetCNodeName(pre_node.first) != kGetNextOpName)) {
|
||||
FinalizePsCache(
|
||||
"The input indices of kernel[Unique] must be produced from dataset directly and the indices value can not be "
|
||||
"changed before delivering to kernel[Unique] in parameter server cache mode.");
|
||||
}
|
||||
}
|
||||
|
||||
void KernelRuntime::CheckIfSupportPSEmbeddingCache(const session::KernelGraph &graph) {
|
||||
AnfNodePtr first_cache_input_index = nullptr;
|
||||
size_t first_cache_size = 0;
|
||||
GetFirstPSEmbeddingCache(graph, &first_cache_input_index, &first_cache_size);
|
||||
MS_EXCEPTION_IF_NULL(first_cache_input_index);
|
||||
for (const auto &kernel : graph.execution_order()) {
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel);
|
||||
if (kernel_name != kGatherV2OpName && kernel_name != kSparseGatherV2OpName) {
|
||||
continue;
|
||||
}
|
||||
auto input_param = common::AnfAlgo::GetPrevNodeOutput(kernel, 0, true);
|
||||
auto input_index = common::AnfAlgo::GetPrevNodeOutput(kernel, 1, true);
|
||||
MS_EXCEPTION_IF_NULL(input_param.first);
|
||||
MS_EXCEPTION_IF_NULL(input_index.first);
|
||||
if (!input_param.first->isa<Parameter>()) {
|
||||
continue;
|
||||
}
|
||||
auto param_name = input_param.first->fullname_with_scope();
|
||||
if (ps::ps_cache_instance.IsHashTable(param_name) && (kernel_name == kSparseGatherV2OpName)) {
|
||||
CheckSparsePSEmbeddingCache(kernel);
|
||||
}
|
||||
while (input_index.first->isa<CNode>() && (common::AnfAlgo::GetCNodeName(input_index.first) == kCastOpName)) {
|
||||
input_index = common::AnfAlgo::GetPrevNodeOutput(input_index.first, 0, true);
|
||||
MS_EXCEPTION_IF_NULL(input_index.first);
|
||||
}
|
||||
auto cnode = common::AnfAlgo::IsGraphKernel(input_index.first)
|
||||
? common::AnfAlgo::GetOutputOfGraphkernel(input_index)
|
||||
: input_index.first;
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (cnode == first_cache_input_index) {
|
||||
if (!ps::ps_cache_instance.IsHashTable(param_name)) {
|
||||
MS_LOG(ERROR) << "The embeddingLookup(" << kernel->fullname_with_scope() << ") doesn't enable cache.";
|
||||
FinalizePsCache(
|
||||
"All the embeddingLookups whose input indices are from dataset must enable cache at the same time when one "
|
||||
"of them enables cache in parameter server training mode.");
|
||||
}
|
||||
auto size = ps::ps_cache_instance.QueryHashTableSize(param_name);
|
||||
if (size != first_cache_size) {
|
||||
MS_LOG(ERROR) << "The cache size(" << size << ") of embeddingLookup(" << kernel->fullname_with_scope()
|
||||
<< ") is not the same as other embeddingLookup cache size(" << first_cache_size << ").";
|
||||
FinalizePsCache("The cache sizes of embeddingLookups are not the same in parameter server training mode.");
|
||||
}
|
||||
} else if (ps::ps_cache_instance.IsHashTable(param_name)) {
|
||||
MS_LOG(ERROR) << "The input index of the embeddingLookup(" << kernel->fullname_with_scope() << ") cache is from "
|
||||
<< cnode->fullname_with_scope();
|
||||
FinalizePsCache(
|
||||
"The embeddingLookup whose input index isn't from dataset doesn't support cache in parameter server training "
|
||||
"mode.");
|
||||
} else if (cnode->isa<CNode>() && (common::AnfAlgo::GetCNodeName(cnode) == kGetNextOpName)) {
|
||||
MS_LOG(ERROR) << "The EmbeddingLookup kernel(" << kernel->fullname_with_scope() << ") doesn't enable cache.";
|
||||
FinalizePsCache(
|
||||
"All EmbeddingLookup kernels whose input indices are from dataset must enable cache at the same time.");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -16,18 +16,10 @@
|
|||
|
||||
#include "runtime/device/kernel_runtime_manager.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#ifdef WITH_BACKEND
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
void KernelRuntimeManager::ClearRuntimeResource() {
|
||||
#ifdef WITH_BACKEND
|
||||
if (ps::PSContext::instance()->is_worker() && ps::PsDataPrefetch::GetInstance().cache_enable()) {
|
||||
ps::ps_cache_instance.SyncEmbeddingTable();
|
||||
}
|
||||
#endif
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
for (auto &iter : runtime_map_) {
|
||||
MS_LOG(INFO) << "Release device " << iter.first;
|
||||
|
@ -128,11 +120,6 @@ void KernelRuntimeManager::ReleaseKernelRuntime(const std::string &device_name,
|
|||
if (runtime == nullptr) {
|
||||
return;
|
||||
}
|
||||
#ifdef WITH_BACKEND
|
||||
if (ps::PSContext::instance()->is_worker() && ps::PsDataPrefetch::GetInstance().cache_enable()) {
|
||||
ps::ps_cache_instance.SyncEmbeddingTable();
|
||||
}
|
||||
#endif
|
||||
runtime->ReleaseDeviceRes();
|
||||
runtime_map_.erase(runtime_iter);
|
||||
}
|
||||
|
|
|
@ -21,9 +21,6 @@
|
|||
#include "runtime/device/kernel_runtime_manager.h"
|
||||
#include "include/common/utils/comm_manager.h"
|
||||
#include "include/common/utils/scoped_long_running.h"
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace session {
|
||||
|
|
|
@ -21,9 +21,6 @@
|
|||
#include "runtime/device/kernel_runtime_manager.h"
|
||||
#include "include/common/utils/comm_manager.h"
|
||||
#include "include/common/utils/scoped_long_running.h"
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace session {
|
||||
|
|
|
@ -226,13 +226,6 @@ list(REMOVE_ITEM MINDSPORE_SRC_LIST
|
|||
"../../../mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/util.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/scheduler.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/optimizer_info.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/optimizer_info_builder.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/worker.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/parameter_server.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/ps_cache/gpu/gpu_ps_cache.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/ps_cache/ascend/ascend_ps_cache.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/fl/server/kernel/sgd_kernel.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/fl/server/kernel/apply_momentum_kernel.cc")
|
||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "common/common_test.h"
|
||||
#include "ps/embedding_table_shard_metadata.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
class TestEmbeddingTableShardMetadata : public UT::Common {
|
||||
public:
|
||||
TestEmbeddingTableShardMetadata() = default;
|
||||
virtual ~TestEmbeddingTableShardMetadata() = default;
|
||||
|
||||
void SetUp() override {}
|
||||
void TearDown() override {}
|
||||
};
|
||||
|
||||
TEST_F(TestEmbeddingTableShardMetadata, EmbeddingTable) {
|
||||
EmbeddingTableShardMetadata embedding_table_shard(1, 100);
|
||||
EXPECT_EQ(embedding_table_shard.begin(), 1);
|
||||
EXPECT_EQ(embedding_table_shard.end(), 100);
|
||||
EXPECT_EQ(embedding_table_shard.size(), 99);
|
||||
}
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
|
@ -1,59 +0,0 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#include "ps/util.h"
|
||||
#include "ps/worker.h"
|
||||
#include "ps/scheduler.h"
|
||||
#include "ps/parameter_server.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ps {
|
||||
PsCacheManager &PsCacheManager::GetInstance() {
|
||||
static PsCacheManager instance{};
|
||||
return instance;
|
||||
}
|
||||
|
||||
void PsCacheManager::Finalize() {}
|
||||
int PsCacheManager::cache_indices_lower_bound() const { return 1; }
|
||||
|
||||
bool Util::IsRoleOfPServer() { return true; }
|
||||
bool Util::IsRoleOfScheduler() { return true; }
|
||||
bool Util::FuseServerCommOps(const pipeline::ResourcePtr &res) { return true; }
|
||||
|
||||
Worker &Worker::GetInstance() {
|
||||
static Worker instance{};
|
||||
return instance;
|
||||
}
|
||||
|
||||
void Worker::Run() {}
|
||||
void Worker::Finalize() {}
|
||||
|
||||
ParameterServer &ParameterServer::GetInstance() {
|
||||
static ParameterServer instance{};
|
||||
return instance;
|
||||
}
|
||||
|
||||
void ParameterServer::Run(const FuncGraphPtr &func_graph) {}
|
||||
|
||||
Scheduler &Scheduler::GetInstance() {
|
||||
static Scheduler instance{};
|
||||
return instance;
|
||||
}
|
||||
|
||||
void Scheduler::Run() {}
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
Loading…
Reference in New Issue