Purge old ps arch

This commit is contained in:
ZPaC 2022-08-24 15:33:40 +08:00
parent c3ec281ec2
commit 3bb04bb3f3
63 changed files with 12 additions and 7683 deletions

View File

@ -545,7 +545,6 @@ if(ENABLE_TEST OR ENABLE_TESTCASES)
list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/ge/ge_operator_stub.cc)
list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/transform/util.cc)
list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/pipeline/action_stub.cc)
list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/ps/ps_stub.cc)
list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/cluster/cluster_stub.cc)
list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/profiling/parallel_strategy_profiling_stub.cc)
@ -562,7 +561,6 @@ endif()
if(NOT ENABLE_TESTCASES AND NOT (ENABLE_D OR ENABLE_CPU OR ENABLE_GPU))
include_directories(${CMAKE_BINARY_DIR})
list(APPEND EXPRESSION_STUB_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/cluster/cluster_stub.cc)
list(APPEND EXPRESSION_STUB_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/ps/ps_stub.cc)
list(APPEND EXPRESSION_STUB_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/fl/fl_stub.cc)
list(APPEND EXPRESSION_STUB_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/fl/server_stub.cc)
list(APPEND EXPRESSION_STUB_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/ps/ps_core_stub.cc)

View File

@ -48,7 +48,7 @@
#include "include/common/utils/parallel_context.h"
#include "kernel/oplib/oplib.h"
#ifdef WITH_BACKEND
#include "ps/ps_cache/ps_cache_manager.h"
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
#include "ps/constants.h"
#include "ps/util.h"
#include "ps/ps_context.h"
@ -546,16 +546,6 @@ void GetNodeUsedList(const FuncGraphPtr &kernel_graph, const AnfNodePtr &node,
}
}
}
// Check whether the Parameter initialized in server is used by the operator executed on the device side.
bool UseParamInitInServer(const FuncGraphPtr &kernel_graph, const AnfNodePtr &param_node) {
std::vector<AnfNodePtr> node_users_list;
GetNodeUsedList(kernel_graph, param_node, &node_users_list);
// Check if there is real CNode among all users of the node.
return std::any_of(node_users_list.begin(), node_users_list.end(),
[](const AnfNodePtr &node) { return AnfUtils::IsRealKernel(node); });
}
#endif
void IterateFindTensor(std::vector<ValuePtr> *msTensors, const VectorRef &ref_list) {
@ -3062,131 +3052,6 @@ void SessionBasic::DumpGraphs(const std::vector<KernelGraphPtr> &graphs) const {
}
void SessionBasic::UnifyMindIR(const KernelGraphPtr &graph) { opt::CommonUnifyMindIR(graph); }
#ifdef WITH_BACKEND
void SessionBasic::InitPsWorker(const KernelGraphPtr &kernel_graph) const {
if (!ps::PSContext::instance()->is_worker()) {
return;
}
// Check whether the Parameter initialized in server is used by the operator executed on the device side.
CheckPSModeConsistence(kernel_graph);
if (ps::PsDataPrefetch::GetInstance().cache_enable()) {
if (!ps::ps_cache_instance.initialized_ps_cache()) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto device_target = context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(device_target, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
auto context = runtime_instance->context();
const auto &kernels = kernel_graph->execution_order();
if (kernels.size() > 0 && common::AnfAlgo::GetCNodeName(kernels[0]) == "InitDataSetQueue") {
GetBatchElements(kernels[0]);
ps::ps_cache_instance.Initialize();
}
ps::ps_cache_instance.DoProcessData(device_id_, context);
}
} else {
// Assign parameter keys.
AssignParamKey(kernel_graph);
}
}
void SessionBasic::GetBatchElements(const AnfNodePtr &kernel_node) const {
auto shapes = common::AnfAlgo::GetNodeAttr<std::vector<std::vector<int64_t>>>(kernel_node, "shapes");
auto types = common::AnfAlgo::GetNodeAttr<std::vector<TypePtr>>(kernel_node, "types");
if (shapes.size() != types.size() || shapes.size() == 0 || types.size() == 0) {
MS_LOG(EXCEPTION) << "Invalid shapes of op[InitDataSetQueue]: shapes size " << shapes.size() << ", types size "
<< types;
}
size_t batch_elements = 1;
const auto &shape = shapes[0];
for (size_t i = 0; i < shape.size(); ++i) {
batch_elements *= LongToSize(shape[i]);
}
ps::ps_cache_instance.set_batch_elements(batch_elements);
}
void SessionBasic::CheckPSModeConsistence(const KernelGraphPtr &kernel_graph) const {
auto input_nodes = kernel_graph->inputs();
for (const auto &input_node : input_nodes) {
if (!input_node->isa<Parameter>()) {
continue;
}
auto pk_node = input_node->cast<ParameterPtr>();
MS_EXCEPTION_IF_NULL(pk_node);
auto param_info_ptr = pk_node->param_info();
const std::string &param_name = pk_node->fullname_with_scope();
// If the Parameter is initialized on the server, and the user of the Parameter contains real CNode which executes
// in device, an error message will be reported, and it is allowed to be used only by the side effect operator.
if (param_info_ptr != nullptr && param_info_ptr->init_in_server() &&
UseParamInitInServer(kernel_graph, input_node) && !ps::ps_cache_instance.IsHashTable(param_name)) {
MS_LOG(EXCEPTION) << "Can not initialize the parameter[" << param_name
<< "] in server, this parameter is used by kernel which executes in device";
}
}
}
void SessionBasic::AssignParamKey(const KernelGraphPtr &kernel_graph) const {
MS_EXCEPTION_IF_NULL(kernel_graph);
// PS embeddingLookup cache check.
if (ps::PsDataPrefetch::GetInstance().cache_enable()) {
MS_LOG(EXCEPTION) << "The other parameter can't set ps mode when the embeddingLookup cache is enabled in "
"parameter server training mode.";
}
std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph->get_return());
for (auto &node : node_list) {
if (node != nullptr && node->isa<CNode>()) {
// Assign key for forward kernel EmbeddingLookup.
// The key will be assigned to embedding table ande Push kernel as well.
if (common::AnfAlgo::GetCNodeName(node) == kEmbeddingLookupOpName) {
size_t embedding_table_idx = 0;
auto embedding_table = common::AnfAlgo::GetInputNode(node->cast<CNodePtr>(), embedding_table_idx);
size_t key = ps::Worker::GetInstance().SetParamKey(embedding_table->fullname_with_scope());
common::AnfAlgo::SetNodeAttr(kAttrPsKey, MakeValue(key), node);
} else if (common::AnfAlgo::GetCNodeName(node) == kPushOpName) {
auto pull_node = FindPullNode(node, node_list);
if (!pull_node) {
MS_LOG(EXCEPTION) << "Assigning parameter key failed: can't find Pull node of the Push node.";
}
// Second input of Pull node is the trainable parameter.
size_t parameter_index = 1;
auto parameter_node = common::AnfAlgo::GetInputNode(pull_node->cast<CNodePtr>(), parameter_index);
size_t key = ps::Worker::GetInstance().SetParamKey(parameter_node->fullname_with_scope());
common::AnfAlgo::SetNodeAttr(kAttrPsKey, MakeValue(key), node);
common::AnfAlgo::SetNodeAttr(kAttrPsKey, MakeValue(key), pull_node);
std::string optimizer_name = common::AnfAlgo::GetNodeAttr<std::string>(node, kAttrOptimizerType);
ps::Worker::GetInstance().SetKeyOptimId(key, optimizer_name);
}
}
}
}
void SessionBasic::InitPSParamAndOptim(const KernelGraphPtr &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs_const) const {
if (!ps::PSContext::instance()->is_worker()) {
return;
}
std::vector<tensor::TensorPtr> inputs(inputs_const);
MS_EXCEPTION_IF_NULL(kernel_graph);
auto input_nodes = kernel_graph->inputs();
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
for (size_t i = 0; i < inputs.size(); ++i) {
auto tensor = inputs[i];
MS_EXCEPTION_IF_NULL(tensor);
auto input_node = input_nodes[i];
MS_EXCEPTION_IF_NULL(input_node);
if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0)) {
ps::Worker::GetInstance().InitPSParamAndOptim(input_node, tensor);
}
}
}
#endif
} // namespace session
void DumpGraphExeOrder(const std::string &file_name, const std::string &target_dir,
const std::vector<CNodePtr> &execution_order) {

View File

@ -137,9 +137,6 @@ class BACKEND_EXPORT SessionBasic : public std::enable_shared_from_this<SessionB
// get graph id in child graphs by ME front anf node pointer
virtual GraphId GetGraphIdByNode(const AnfNodePtr &) const;
virtual GraphId GetFinalRunGraph() const { return kInvalidGraphId; }
void AssignParamKey(const KernelGraphPtr &kernel_graph) const;
void InitPSParamAndOptim(const KernelGraphPtr &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs_const) const;
bool IsGetNextGraph(const std::shared_ptr<KernelGraph> &kernel_graph, std::string *channel_name) const;
virtual bool CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs,
std::string *error_msg) const {
@ -345,11 +342,6 @@ class BACKEND_EXPORT SessionBasic : public std::enable_shared_from_this<SessionB
virtual std::string GetCommWorldGroup() { return std::string(); }
void DumpGraphs(const std::vector<KernelGraphPtr> &graphs) const;
void GetConstValueDepend(const CNodePtr &cnode, std::vector<size_t> *const_input_attr_index) const;
#ifdef WITH_BACKEND
void CheckPSModeConsistence(const KernelGraphPtr &kernel_graph) const;
void GetBatchElements(const AnfNodePtr &kernel_node) const;
void InitPsWorker(const KernelGraphPtr &kernel_graph) const;
#endif
// TODO(caifubi): refactor and remove bucket.
std::map<uint32_t, std::vector<std::shared_ptr<device::Bucket>>> bucket_map_;
std::map<uint32_t, uint32_t> free_bucket_id_map_;

View File

@ -156,6 +156,9 @@ class BACKEND_EXPORT EmbeddingCacheTableManager {
// automatic parallel scenario.
int cache_indices_lower_bound() const;
// Get vocab cache size on device.
size_t vocab_cache_size() const { return device_cache_size_; }
void DumpHashTables() const;
private:

View File

@ -27,7 +27,7 @@
#include "frontend/parallel/graph_util/generate_graph.h"
#include "include/common/utils/parallel_context.h"
#ifdef WITH_BACKEND
#include "ps/ps_cache/ps_cache_manager.h"
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
#include "utils/ms_context.h"
#include "ps/ps_context.h"
#include "distributed/embedding_cache/embedding_cache_utils.h"
@ -760,8 +760,6 @@ Status GatherInfo::InferBias() {
if (ps::PsDataPrefetch::GetInstance().cache_enable()) {
if (ps::PSContext::instance()->enable_distributed_mindrt()) {
bias_ = static_cast<int64_t>(embedding_cache_table_manager.cache_indices_lower_bound());
} else {
bias_ = static_cast<int64_t>(ps::PsCacheManager::GetInstance().cache_indices_lower_bound());
}
return SUCCESS;
}

View File

@ -30,7 +30,8 @@
#include "include/common/utils/parallel_context.h"
#include "frontend/parallel/tensor_layout/tensor_redistribution.h"
#ifdef WITH_BACKEND
#include "ps/ps_cache/ps_cache_manager.h"
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
#include "ps/ps_context.h"
#include "distributed/embedding_cache/embedding_cache_utils.h"
#endif
@ -112,10 +113,8 @@ Status UniqueInfo::ComputeReplaceGraph(const CNodePtr &cnode) {
int64_t bias = 0;
if (ps::PSContext::instance()->enable_distributed_mindrt()) {
bias = static_cast<int64_t>(embedding_cache_table_manager.cache_indices_lower_bound());
} else {
bias = static_cast<int64_t>(ps::PsCacheManager::GetInstance().cache_indices_lower_bound());
}
auto slice_size = SizeToLong(ps::PsCacheManager::GetInstance().vocab_cache_size());
auto slice_size = SizeToLong(embedding_cache_table_manager.vocab_cache_size());
auto sub = gen_g.PushBack({gen_g.NewOpInst(SUB), gen_g.virtual_input_node(), CreateInt32Tensor(bias)});
auto relu = gen_g.PushBack({gen_g.NewOpInst(RELU), sub});

View File

@ -53,9 +53,7 @@
#include "load_mindir/infer_mindir.h"
#include "debug/data_dump/dump_json_parser.h"
#ifdef WITH_BACKEND
#include "ps/parameter_server.h"
#include "ps/scheduler.h"
#include "ps/worker.h"
#include "fl/worker/fl_worker.h"
#include "fl/server/server.h"
#include "distributed/cluster/cluster_context.h"
@ -1169,32 +1167,11 @@ bool ExecuteAction(const ResourcePtr &resource) {
}
#ifdef WITH_BACKEND
bool StartPSWorkerAction(const ResourcePtr &) {
ps::Worker::GetInstance().Run();
return true;
}
bool StartFLWorkerAction(const ResourcePtr &) {
fl::worker::FLWorker::GetInstance().Run();
return true;
}
bool StartPSServerAction(const ResourcePtr &resource) {
if (distributed::cluster::ClusterContext::instance()->initialized()) {
MS_LOG(INFO) << "This node is server. Start wait for finalizing.";
if (!distributed::cluster::ClusterContext::instance()->Finalize(UINT32_MAX)) {
MS_LOG(ERROR) << "Failed to finalize server.";
return false;
}
MS_LOG(INFO) << "Server is successfully finalized.";
return true;
}
MS_EXCEPTION_IF_NULL(resource);
FuncGraphPtr func_graph = resource->func_graph();
auto &ps = ps::ParameterServer::GetInstance();
ps.Run(func_graph);
return true;
}
bool StartServerAction(const ResourcePtr &resource) {
MS_EXCEPTION_IF_NULL(resource);
FuncGraphPtr func_graph = resource->func_graph();
@ -1578,8 +1555,6 @@ std::vector<ActionItem> VmPipeline(const ResourcePtr &resource) {
std::string server_mode = ps::PSContext::instance()->server_mode();
if (server_mode == ps::kServerModeFL || server_mode == ps::kServerModeHybrid) {
(void)actions.emplace_back(std::make_pair("worker", StartFLWorkerAction));
} else {
(void)actions.emplace_back(std::make_pair("worker", StartPSWorkerAction));
}
}
}
@ -1623,18 +1598,6 @@ std::vector<ActionItem> ServerPipeline(const ResourcePtr &resource) {
return actions;
}
std::vector<ActionItem> PServerPipeline(const ResourcePtr &resource) {
if (resource->EnableCompileCache() && resource->func_graph() != nullptr) {
return {std::make_pair("pserver", StartPSServerAction)};
}
auto actions = CommonPipeline();
(void)actions.emplace_back(std::make_pair("optimize", VmOptimizeAction));
(void)actions.emplace_back(std::make_pair("auto_monad_reorder", OrderEnforceAction));
(void)actions.emplace_back(std::make_pair("validate", ValidateAction));
(void)actions.emplace_back(std::make_pair("pserver", StartPSServerAction));
return actions;
}
std::vector<ActionItem> PSchedulerPipeline(const ResourcePtr &resource) {
if (resource->EnableCompileCache() && resource->func_graph() != nullptr) {
return {std::make_pair("scheduler", StartPSSchedulerAction)};

View File

@ -40,9 +40,7 @@ bool VmOptimizeAction(const ResourcePtr &resource);
bool PynativeElimOpt(const ResourcePtr &resource);
bool TaskEmitAction(const ResourcePtr &resource);
bool ExecuteAction(const ResourcePtr &resource);
bool StartPSWorkerAction(const ResourcePtr &resource);
bool StartFLWorkerAction(const ResourcePtr &resource);
bool StartPSServerAction(const ResourcePtr &resource);
bool StartPSSchedulerAction(const ResourcePtr &resource);
// This action is only for federated learning only. In later version, parameter server mode and federated learning will
// use the same action.

View File

@ -69,9 +69,7 @@
#ifdef WITH_BACKEND
#include "ps/constants.h"
#include "ps/util.h"
#include "ps/worker.h"
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
#include "ps/ps_cache/ps_cache_manager.h"
#include "fl/server/server.h"
#include "fl/worker/fl_worker.h"
#include "distributed/cluster/cluster_context.h"
@ -771,10 +769,6 @@ std::vector<ActionItem> GetPipeline(const ResourcePtr &resource, const std::stri
ps::PSContext::instance()->is_server()) {
return ServerPipeline(resource);
}
if (ps::PSContext::instance()->is_server()) {
resource->SetBackendAsync([]() { return compile::CreateBackend(); });
return PServerPipeline(resource);
}
if (ps::PSContext::instance()->is_scheduler()) {
return PSchedulerPipeline(resource);
}
@ -1737,15 +1731,10 @@ void ClearResAtexit() {
#ifdef WITH_BACKEND
if (!distributed::cluster::ClusterContext::instance()->initialized() && ps::PSContext::instance()->is_ps_mode() &&
ps::PSContext::instance()->is_worker()) {
if (ps::PsDataPrefetch::GetInstance().cache_enable()) {
ps::ps_cache_instance.Finalize();
}
MS_LOG(INFO) << "Start finalizing worker.";
const std::string &server_mode = ps::PSContext::instance()->server_mode();
if ((server_mode == ps::kServerModeFL || server_mode == ps::kServerModeHybrid)) {
fl::worker::FLWorker::GetInstance().Finalize();
} else {
ps::Worker::GetInstance().Finalize();
}
}
#endif

View File

@ -1,310 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/ascend/hal/device/ps/ascend_ps_cache.h"
#include <google/protobuf/text_format.h>
#include <string>
#include <vector>
#include <memory>
#include "ps/ps_cache/ps_cache_factory.h"
#include "plugin/device/ascend/hal/device/ascend_memory_pool.h"
#include "plugin/device/ascend/kernel/aicpu/aicpu_kernel_mod.h"
#include "utils/ms_context.h"
#include "proto/tensor.pb.h"
#include "proto/tensor_shape.pb.h"
#include "proto/attr.pb.h"
#include "proto/node_def.pb.h"
#include "runtime/rt.h"
#include "acl/acl_rt.h"
using mindspore::kernel::Address;
using AddressPtr = std::shared_ptr<Address>;
using AddressPtrList = std::vector<AddressPtr>;
namespace mindspore {
namespace ps {
namespace ascend {
MS_REG_PS_CACHE(kAscendDevice, AscendPsCache);
namespace {
bool SetProtoInputs(const std::vector<std::vector<size_t>> &data_shape, const std::vector<TypeId> &data_type,
mindspore::NodeDef *proto) {
MS_ERROR_IF_NULL(proto);
if (data_shape.size() != data_type.size()) {
MS_LOG(ERROR) << "The size of data shape is not equal to the size of data type.";
return false;
}
for (size_t input_index = 0; input_index < data_shape.size(); input_index++) {
::mindspore::Tensor *proto_inputs = proto->add_inputs();
MS_ERROR_IF_NULL(proto_inputs);
auto input_shape = data_shape[input_index];
mindspore::TensorShape *tensorShape = proto_inputs->mutable_tensor_shape();
MS_ERROR_IF_NULL(tensorShape);
for (auto item : input_shape) {
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
MS_ERROR_IF_NULL(dim);
dim->set_size((::google::protobuf::int64)item);
}
auto input_type = kernel::AicpuOpUtil::MsTypeToProtoType(data_type[input_index]);
proto_inputs->set_tensor_type(input_type);
proto_inputs->set_mem_device("HBM");
}
return true;
}
bool SetProtoOutputs(const std::vector<std::vector<size_t>> &data_shape, const std::vector<TypeId> &data_type,
mindspore::NodeDef *proto) {
MS_ERROR_IF_NULL(proto);
if (data_shape.size() != data_type.size()) {
MS_LOG(ERROR) << "The size of data shape is not equal to the size of data type.";
return false;
}
for (size_t output_index = 0; output_index < data_shape.size(); output_index++) {
::mindspore::Tensor *proto_outputs = proto->add_outputs();
MS_ERROR_IF_NULL(proto_outputs);
auto output_shape = data_shape[output_index];
mindspore::TensorShape *tensorShape = proto_outputs->mutable_tensor_shape();
MS_ERROR_IF_NULL(tensorShape);
for (auto item : output_shape) {
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
MS_ERROR_IF_NULL(dim);
dim->set_size((::google::protobuf::int64)item);
}
auto output_type = kernel::AicpuOpUtil::MsTypeToProtoType(data_type[output_index]);
proto_outputs->set_tensor_type(output_type);
proto_outputs->set_mem_device("HBM");
}
return true;
}
bool SetNodedefProto(const std::shared_ptr<KernelNodeInfo> &op_info,
const std::shared_ptr<kernel::AicpuOpKernelMod> &kernel_mod_ptr) {
MS_ERROR_IF_NULL(op_info);
MS_ERROR_IF_NULL(kernel_mod_ptr);
mindspore::NodeDef proto;
proto.set_op(op_info->op_name_);
RETURN_IF_FALSE(SetProtoInputs(op_info->input_data_shape_, op_info->input_data_type_, &proto));
RETURN_IF_FALSE(SetProtoOutputs(op_info->output_data_shape_, op_info->output_data_type_, &proto));
std::string nodeDefStr;
if (!proto.SerializeToString(&nodeDefStr)) {
MS_LOG(ERROR) << "Serialize nodeDef to string failed.";
return false;
}
MS_LOG(DEBUG) << "Set node def proto, node name:" << op_info->op_name_;
kernel_mod_ptr->SetNodeDef(nodeDefStr);
return true;
}
} // namespace
bool AscendPsCache::InitDevice(uint32_t device_id, const void *context) {
MS_ERROR_IF_NULL(context);
auto ret = rtSetDevice(UintToInt(device_id));
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtSetDevice, ret[" << ret << "]";
return false;
}
auto rt_context = const_cast<rtContext_t>(context);
ret = rtCtxSetCurrent(rt_context);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtCtxSetCurrent, ret[" << ret << "]";
return false;
}
ret = rtStreamCreate(&stream_, 0);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtStreamCreate, ret[" << ret << "]";
return false;
}
return true;
}
void *AscendPsCache::MallocMemory(size_t size) {
const auto device_addr = device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(size);
if (device_addr == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << size;
}
return device_addr;
}
void AscendPsCache::FreeMemory(void *device_addr) {
device::ascend::AscendMemoryPool::GetInstance().FreeTensorMem(device_addr);
}
bool AscendPsCache::MallocConstantMemory(size_t cache_vocab_size) {
offset_addr_ = reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
if (offset_addr_ == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << sizeof(int);
}
rtMemset(offset_addr_, sizeof(int), 0, sizeof(int));
cache_vocab_size_addr_ =
reinterpret_cast<int *>(device::ascend::AscendMemoryPool::GetInstance().AllocTensorMem(sizeof(int)));
if (cache_vocab_size_addr_ == nullptr) {
MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << sizeof(int);
}
int copy_value = SizeToInt(cache_vocab_size);
if (!CopyHostMemToDevice(cache_vocab_size_addr_, &copy_value, sizeof(int))) {
return false;
}
return SynchronizeStream();
}
bool AscendPsCache::RecordEvent() {
event_.reset(new rtEvent_t());
MS_ERROR_IF_NULL_W_RET_VAL(event_, false);
auto ret = rtEventCreate(&(*event_));
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Create event failed";
return false;
}
ret = rtEventRecord(*event_, stream_);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Record event failed";
return false;
}
return true;
}
bool AscendPsCache::SynchronizeEvent() {
MS_ERROR_IF_NULL_W_RET_VAL(event_, false);
auto ret = rtEventSynchronize(*event_);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "tEventSynchronize failed";
return false;
}
ret = rtEventDestroy(*event_);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "rtEventDestroy failed";
return false;
}
return true;
}
bool AscendPsCache::SynchronizeStream() {
MS_ERROR_IF_NULL_W_RET_VAL(stream_, false);
auto ret = rtStreamSynchronize(stream_);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "rtStreamSynchronize failed";
return false;
}
return true;
}
bool AscendPsCache::CopyHostMemToDevice(void *dst, const void *src, size_t size) {
MS_ERROR_IF_NULL(dst);
MS_ERROR_IF_NULL(src);
auto ret = aclrtMemcpyAsync(dst, size, src, size, ACL_MEMCPY_HOST_TO_DEVICE, stream_);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "aclrtMemcpyAsync failed, the error num is:" << ret;
return false;
}
return true;
}
bool AscendPsCache::CopyDeviceMemToHost(void *dst, const void *src, size_t size) {
MS_ERROR_IF_NULL(dst);
MS_ERROR_IF_NULL(src);
auto ret = aclrtMemcpyAsync(dst, size, src, size, ACL_MEMCPY_DEVICE_TO_HOST, stream_);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "aclrtMemcpyAsync failed, the error num is:" << ret;
return false;
}
return true;
}
bool AscendPsCache::HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr,
size_t cache_vocab_size, size_t embedding_size, size_t swap_out_size) {
MS_ERROR_IF_NULL(hash_table_addr);
MS_ERROR_IF_NULL(swap_out_value_addr);
MS_ERROR_IF_NULL(swap_out_index_addr);
auto hash_swap_out_mod = std::make_shared<kernel::AicpuOpKernelMod>();
MS_ERROR_IF_NULL(hash_swap_out_mod);
hash_swap_out_mod->SetNodeName(kEmbeddingLookupOpName);
std::vector<size_t> hash_table_shape = {cache_vocab_size, embedding_size};
std::vector<size_t> swap_out_index_shape = {swap_out_size};
std::vector<size_t> offset_shape = {1};
std::vector<std::vector<size_t>> input_shape = {hash_table_shape, swap_out_index_shape, offset_shape};
std::vector<size_t> swap_out_value_shape = {swap_out_size, embedding_size};
std::vector<std::vector<size_t>> output_shape = {swap_out_value_shape};
std::vector<TypeId> input_type = {TypeId::kNumberTypeFloat32, TypeId::kNumberTypeInt32, TypeId::kNumberTypeInt32};
std::vector<TypeId> output_type = {TypeId::kNumberTypeFloat32};
auto op_info =
std::make_shared<KernelNodeInfo>(kEmbeddingLookupOpName, input_shape, input_type, output_shape, output_type);
MS_ERROR_IF_NULL_W_RET_VAL(op_info, false);
RETURN_IF_FALSE(SetNodedefProto(op_info, hash_swap_out_mod));
AddressPtrList kernel_inputs;
AddressPtrList kernel_outputs = {
std::make_shared<Address>(swap_out_value_addr, swap_out_size * embedding_size * sizeof(float))};
AddressPtrList kernel_workspaces;
(void)kernel_inputs.emplace_back(
std::make_shared<Address>(hash_table_addr, cache_vocab_size * embedding_size * sizeof(float)));
(void)kernel_inputs.emplace_back(std::make_shared<Address>(swap_out_index_addr, swap_out_size * sizeof(int)));
(void)kernel_inputs.emplace_back(std::make_shared<Address>(offset_addr_, sizeof(int)));
auto ret = hash_swap_out_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
if (!ret) {
MS_LOG(ERROR) << "Hash swap out launch failed.";
return false;
}
return true;
}
bool AscendPsCache::HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr,
size_t cache_vocab_size, size_t embedding_size, size_t swap_in_size) {
MS_ERROR_IF_NULL(hash_table_addr);
MS_ERROR_IF_NULL(swap_in_value_addr);
MS_ERROR_IF_NULL(swap_in_index_addr);
auto hash_swap_in_mod = std::make_shared<kernel::AicpuOpKernelMod>();
MS_ERROR_IF_NULL(hash_swap_in_mod);
hash_swap_in_mod->SetNodeName(kernel::kUpdateCache);
std::vector<size_t> hash_table_shape = {cache_vocab_size, embedding_size};
std::vector<size_t> swap_in_index_shape = {swap_in_size};
std::vector<size_t> swap_in_value_shape = {swap_in_size, embedding_size};
std::vector<size_t> offset_shape = {1};
std::vector<std::vector<size_t>> input_shape = {hash_table_shape, swap_in_index_shape, swap_in_value_shape,
offset_shape};
std::vector<std::vector<size_t>> output_shape = {offset_shape};
std::vector<TypeId> input_type = {TypeId::kNumberTypeFloat32, TypeId::kNumberTypeInt32, TypeId::kNumberTypeFloat32,
TypeId::kNumberTypeInt32};
std::vector<TypeId> output_type = {TypeId::kNumberTypeInt32};
auto op_info =
std::make_shared<KernelNodeInfo>(kernel::kUpdateCache, input_shape, input_type, output_shape, output_type);
MS_ERROR_IF_NULL_W_RET_VAL(op_info, false);
SetNodedefProto(op_info, hash_swap_in_mod);
AddressPtrList kernel_inputs;
AddressPtrList kernel_outputs;
AddressPtrList kernel_workspaces;
(void)kernel_inputs.emplace_back(
std::make_shared<Address>(hash_table_addr, cache_vocab_size * embedding_size * sizeof(float)));
(void)kernel_inputs.emplace_back(std::make_shared<Address>(swap_in_index_addr, swap_in_size * sizeof(int)));
(void)kernel_inputs.emplace_back(
std::make_shared<Address>(swap_in_value_addr, swap_in_size * embedding_size * sizeof(float)));
(void)kernel_inputs.emplace_back(std::make_shared<Address>(cache_vocab_size_addr_, sizeof(int)));
// The output of updateCache kernel is required but not useful, so any address can be assigned.
(void)kernel_outputs.emplace_back(std::make_shared<Address>(offset_addr_, sizeof(int)));
auto ret = hash_swap_in_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
if (!ret) {
MS_LOG(ERROR) << "Hash swap in launch failed.";
return false;
}
return true;
}
} // namespace ascend
} // namespace ps
} // namespace mindspore

View File

@ -1,75 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_PS_ASCEND_PS_CACHE_H_
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_PS_ASCEND_PS_CACHE_H_
#include <string>
#include <vector>
#include <memory>
#include <utility>
#include "ps/ps_cache/ps_cache_basic.h"
#include "plugin/device/ascend/kernel/aicpu/aicpu_kernel_mod.h"
#include "ir/dtype.h"
#include "runtime/base.h"
namespace mindspore {
namespace ps {
namespace ascend {
struct KernelNodeInfo {
KernelNodeInfo(const std::string &op_name, std::vector<std::vector<size_t>> input_data_shape,
std::vector<TypeId> input_data_type, std::vector<std::vector<size_t>> output_data_shape,
std::vector<TypeId> output_data_type)
: op_name_(op_name) {
input_data_shape_.swap(input_data_shape);
input_data_type_.swap(input_data_type);
output_data_shape_.swap(output_data_shape);
output_data_type_.swap(output_data_type);
}
std::string op_name_;
std::vector<std::vector<size_t>> input_data_shape_;
std::vector<TypeId> input_data_type_;
std::vector<std::vector<size_t>> output_data_shape_;
std::vector<TypeId> output_data_type_;
};
class AscendPsCache : public PsCacheBasic {
public:
AscendPsCache() = default;
~AscendPsCache() override = default;
bool InitDevice(uint32_t device_id, const void *context) override;
void *MallocMemory(size_t size) override;
void FreeMemory(void *device_addr) override;
bool MallocConstantMemory(size_t cache_vocab_size) override;
bool RecordEvent() override;
bool SynchronizeEvent() override;
bool SynchronizeStream() override;
bool CopyHostMemToDevice(void *dst, const void *src, size_t size) override;
bool CopyDeviceMemToHost(void *dst, const void *src, size_t size) override;
bool HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr, size_t cache_vocab_size,
size_t embedding_size, size_t swap_out_size) override;
bool HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr, size_t cache_vocab_size,
size_t embedding_size, size_t swap_in_size) override;
private:
int *offset_addr_{nullptr};
int *cache_vocab_size_addr_{nullptr};
std::unique_ptr<rtEvent_t> event_;
};
} // namespace ascend
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_PS_ASCEND_PS_CACHE_H_

View File

@ -72,7 +72,6 @@
#endif
#ifdef WITH_BACKEND
#include "ps/util.h"
#include "ps/ps_cache/ps_cache_manager.h"
#endif
#include "plugin/device/ascend/hal/device/ascend_bucket.h"
#include "plugin/device/ascend/hal/device/ascend_device_address.h"
@ -249,12 +248,6 @@ bool TensorNeedSync(const std::shared_ptr<KernelGraph> &kernel_graph, const AnfN
}
MS_EXCEPTION_IF_NULL(memcpy_nums);
(*memcpy_nums)++;
#ifdef WITH_BACKEND
const std::string &param_name = parameter->fullname_with_scope();
if (ps::ps_cache_instance.IsHashTable(param_name)) {
return false;
}
#endif
auto input_param = parameter->cast<ParameterPtr>();
MS_EXCEPTION_IF_NULL(input_param);
if (common::AnfAlgo::IsParameterWeight(input_param) || kernel_graph->IsUpdatedParameter(input_param)) {
@ -347,12 +340,6 @@ void AscendSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_gra
}
if (AnfAlgo::OutputAddrExist(input_node, 0) &&
TensorNeedSync(kernel_graph, input_node, tensor, &device_memcpy_nums)) {
#ifdef WITH_BACKEND
const std::string &param_name = input_node->fullname_with_scope();
if (ps::ps_cache_instance.IsHashTable(param_name)) {
continue;
}
#endif
auto device_address = AnfAlgo::GetMutableOutputAddr(input_node, 0);
MS_EXCEPTION_IF_NULL(device_address);
if (size != 0 &&
@ -460,9 +447,6 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
// adjust kernel
AdjustKernel(root_graph);
#ifdef WITH_BACKEND
InitPsWorker(root_graph);
#endif
// assign stream
AssignStream(NOT_NULL(root_graph));
#ifndef ENABLE_SECURITY
@ -539,9 +523,6 @@ void AscendSession::BuildGraphImpl(GraphId graph_id) {
single_graph->UpdateExecuteKernelStreamLabel();
// adjust execution order because merge child graph and other special operations
AdjustKernel(graph);
#ifdef WITH_BACKEND
InitPsWorker(graph);
#endif
// Assign streams for control sink and hccl and so on
AssignStream(NOT_NULL(graph));
#ifndef ENABLE_SECURITY
@ -617,14 +598,6 @@ void AscendSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_g
debugger_->PreExecute(kernel_graph);
}
#endif
#ifdef WITH_BACKEND
// Initialize parameter server
InitPSParamAndOptim(kernel_graph, inputs);
std::string channel_name;
if (ps::PsDataPrefetch::GetInstance().cache_enable() && IsGetNextGraph(kernel_graph, &channel_name)) {
ps::ps_cache_instance.IncreaseGraphStep(channel_name);
}
#endif
}
// Ascend old runtime.

View File

@ -94,7 +94,6 @@ void CPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode && ps::PSContext::instance()->is_ps_mode()) {
AssignParamKey(kernel_graph);
if (ps::PSContext::instance()->is_worker()) {
std::string pass_name = "replace_node_by_proxy";
pass_name.append(std::to_string(graph_sum_));
@ -197,10 +196,6 @@ void CPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_grap
const std::vector<tensor::TensorPtr> &inputs, VectorRef *const outputs) {
MS_LOG(INFO) << "Bind input output address";
runtime_.BindInputOutput(kernel_graph.get(), inputs, outputs);
#ifdef WITH_BACKEND
InitPSParamAndOptim(kernel_graph, inputs);
#endif
}
void CPUSession::PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,

View File

@ -1,34 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/cpu/kernel/ps/apply_momentum_ps_kernel.h"
namespace mindspore {
namespace kernel {
namespace ps {
bool ApplyMomentumPSKernelMod::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
return Launch(inputs, workspace, outputs);
}
const std::vector<size_t> &ApplyMomentumPSKernelMod::input_sizes() const { return GetInputSizeList(); }
const std::vector<size_t> &ApplyMomentumPSKernelMod::output_sizes() const { return GetOutputSizeList(); }
const std::vector<size_t> &ApplyMomentumPSKernelMod::workspace_sizes() const { return GetWorkspaceSizeList(); }
} // namespace ps
} // namespace kernel
} // namespace mindspore

View File

@ -1,45 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_APPLY_MOMENTUM_PS_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_APPLY_MOMENTUM_PS_KERNEL_H_
#include <vector>
#include <memory>
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
#include "plugin/device/cpu/kernel/apply_momentum_cpu_kernel.h"
namespace mindspore {
namespace kernel {
namespace ps {
class ApplyMomentumPSKernelMod : public ApplyMomentumCpuKernelMod, public PServerKernel {
public:
ApplyMomentumPSKernelMod(size_t rank_id, size_t pserver_num, size_t worker_num)
: PServerKernel(rank_id, pserver_num, worker_num) {}
~ApplyMomentumPSKernelMod() override = default;
bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
const std::vector<size_t> &input_sizes() const override;
const std::vector<size_t> &output_sizes() const override;
const std::vector<size_t> &workspace_sizes() const override;
};
} // namespace ps
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_APPLY_MOMENTUM_PS_KERNEL_H_

View File

@ -1,103 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/cpu/kernel/ps/embedding_look_up_proxy_kernel.h"
#include <vector>
#include <algorithm>
#include "ps/worker.h"
#include "ps/util.h"
namespace mindspore {
namespace kernel {
namespace ps {
constexpr size_t kEmbeddingLookUpProxyInputsNum = 2;
constexpr size_t kEmbeddingLookUpProxyOutputsNum = 1;
void EmbeddingLookUpProxyKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
EmbeddingLookUpCpuKernelMod::InitKernel(kernel_node);
auto input_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0));
auto indices_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1));
auto output_shape = Convert2SizeTClipNeg(common::AnfAlgo::GetOutputInferShape(kernel_node, 0));
size_t axis = kShape2dDims - input_shape.size();
if (input_shape.empty() || input_shape.size() > kShape2dDims) {
MS_LOG(EXCEPTION) << "Input shape can not empty or greater than " << kShape2dDims << "-D, but got "
<< input_shape.size();
}
for (auto dim : input_shape) {
input_dims_ *= dim;
}
if (input_dims_ * sizeof(float) > INT_MAX) {
MS_LOG(EXCEPTION) << "PS mode embedding lookup max embedding table size is " << INT_MAX << ", current shape "
<< input_shape << " is too large.";
}
if (mindspore::ps::PSContext::instance()->is_worker()) {
key_ = common::AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
}
std::vector<float> values;
(void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(values),
[](size_t dim) -> float { return SizeToFloat(dim); });
(void)std::transform(indices_shape.begin(), indices_shape.end(), std::back_inserter(values),
[](size_t dim) -> float { return SizeToFloat(dim); });
(void)std::transform(output_shape.begin(), output_shape.end(), std::back_inserter(values),
[](size_t dim) -> float { return SizeToFloat(dim); });
MS_LOG(INFO) << "Init embedding lookup proxy kernel, input shape:" << input_shape
<< ", indices_shape:" << indices_shape << ", output_shape:" << output_shape;
if (mindspore::ps::PSContext::instance()->is_worker()) {
mindspore::ps::Worker::GetInstance().AddEmbeddingTable(key_, input_shape[axis]);
mindspore::ps::ParamInitInfoMessage info;
if (!mindspore::ps::Worker::GetInstance().InitPSEmbeddingTable(key_, input_shape, indices_shape, output_shape,
info)) {
MS_LOG(EXCEPTION) << "InitPSEmbeddingTable failed.";
}
}
}
bool EmbeddingLookUpProxyKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kEmbeddingLookUpProxyInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kEmbeddingLookUpProxyOutputsNum, kernel_name_);
auto indices_addr = reinterpret_cast<int *>(inputs[1]->addr);
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
size_t input_size = inputs[1]->size;
size_t output_size = outputs[0]->size;
size_t size = input_size / sizeof(int);
std::vector<int> lookup_ids(size, 0);
std::vector<float> lookup_result(output_size / sizeof(float), 0);
auto ret = memcpy_s(lookup_ids.data(), lookup_ids.size() * sizeof(int), indices_addr, input_size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Lookup id memcpy failed.";
}
if (!mindspore::ps::Worker::GetInstance().DoPSEmbeddingLookup(key_, lookup_ids, &lookup_result,
mindspore::ps::kEmbeddingLookupCmd)) {
MS_LOG(EXCEPTION) << "DoPSEmbeddingLookup failed.";
}
auto ret2 = memcpy_s(output_addr, outputs[0]->size, lookup_result.data(), output_size);
if (ret2 != EOK) {
MS_LOG(EXCEPTION) << "Lookup result memcpy failed.";
}
return true;
}
MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, EmbeddingLookupProxy, EmbeddingLookUpProxyKernel);
} // namespace ps
} // namespace kernel
} // namespace mindspore

View File

@ -1,51 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_
#include "plugin/device/cpu/kernel/embedding_look_up_cpu_kernel.h"
#include <vector>
#include "plugin/factory/ms_factory.h"
namespace mindspore {
namespace kernel {
namespace ps {
class EmbeddingLookUpProxyKernel : public EmbeddingLookUpCpuKernelMod {
public:
EmbeddingLookUpProxyKernel() = default;
~EmbeddingLookUpProxyKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
std::vector<KernelAttr> GetOpSupport() override {
static const std::vector<KernelAttr> support_list = {
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32)};
return support_list;
}
private:
size_t key_{0};
size_t input_dims_{1};
};
} // namespace ps
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PROXY_KERNEL_H_

View File

@ -1,114 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/cpu/kernel/ps/embedding_look_up_ps_kernel.h"
#include <vector>
#include <memory>
#include <functional>
#include "kernel/common_utils.h"
#include "ps/util.h"
namespace mindspore {
namespace kernel {
namespace ps {
using mindspore::ps::Util;
constexpr int kAxis = 0;
constexpr size_t kEmbeddingLookUpPSInputSize = 3;
void EmbeddingLookUpPSKernelMod::InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &shapes) {
const std::vector<std::shared_ptr<ShapeVector>> &shape_vec = *shapes;
if (shape_vec.size() < kEmbeddingLookUpPSInputSize) {
MS_LOG(EXCEPTION) << "EmbeddingLookUpPSKernelMod needs " << kEmbeddingLookUpPSInputSize << " input shapes, but got "
<< shape_vec.size();
}
for (auto shape : shape_vec) {
MS_EXCEPTION_IF_NULL(shape);
}
auto input_shape = *(shape_vec[0]);
if (input_shape.empty()) {
MS_LOG(EXCEPTION) << "Input shape can not empty";
}
first_dim_size_ = LongToSize(input_shape[0]);
outer_dim_size_ *= SizeOf(input_shape);
auto indices_shape = *(shape_vec[1]);
indices_lens_ = SizeOf(indices_shape);
size_t output_index = 2;
auto output_shape = *(shape_vec[output_index]);
int64_t offset = 0;
for (size_t i = 0; i < rank_id_; i++) {
offset += Util::LocalShard(input_shape[kAxis], SizeToLong(i), SizeToLong(pserver_num_));
}
offset_ = offset;
// input shape must be sharded after computing offset_;
Shard(&input_shape, kAxis);
input_shape_ = Convert2SizeT(input_shape);
size_t output_size = sizeof(float) * SizeOf(output_shape);
(void)output_size_list_.emplace_back(output_size);
}
void EmbeddingLookUpPSKernelMod::ReInit(const std::vector<ShapeVector> &shapes) {
if (shapes.empty() || shapes[0].empty()) {
MS_LOG(EXCEPTION) << "Shape can not empty";
}
const auto &indices_shape = shapes[0];
indices_lens_ = LongToSize(indices_shape[0]);
size_t output_size = sizeof(float) * indices_lens_;
for (size_t i = kAxis + 1; i < input_shape_.size(); i++) {
output_size *= input_shape_[i];
}
output_size_list_.clear();
(void)output_size_list_.emplace_back(output_size);
}
bool EmbeddingLookUpPSKernelMod::Execute(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
return Launch(inputs, workspace, outputs);
}
void EmbeddingLookUpPSKernelMod::UpdateEmbeddings(float *embedding_table, const size_t *lookup_ids,
const float *update_vals, size_t ids_size) {
size_t copy_len = outer_dim_size_ * sizeof(float);
size_t dest_len = copy_len;
for (size_t i = 0; i < ids_size; ++i) {
int index = SizeToInt(lookup_ids[i]) - LongToInt(offset_);
if (index < 0 || index >= SizeToInt(first_dim_size_)) {
MS_LOG(EXCEPTION) << "UpdateEmbeddings index invalid.";
}
auto ret = memcpy_s(embedding_table + IntToSize(index) * outer_dim_size_, dest_len,
update_vals + i * outer_dim_size_, copy_len);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "LookUpTable task memcpy failed.";
}
}
}
const std::vector<size_t> &EmbeddingLookUpPSKernelMod::input_sizes() const { return input_shape_; }
const std::vector<size_t> &EmbeddingLookUpPSKernelMod::output_sizes() const { return GetOutputSizeList(); }
const std::vector<size_t> &EmbeddingLookUpPSKernelMod::workspace_sizes() const { return GetWorkspaceSizeList(); }
int64_t EmbeddingLookUpPSKernelMod::offset() const { return offset_; }
} // namespace ps
} // namespace kernel
} // namespace mindspore

View File

@ -1,53 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_
#include <vector>
#include <memory>
#include "plugin/device/cpu/kernel/embedding_look_up_cpu_kernel.h"
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
namespace mindspore {
namespace kernel {
namespace ps {
class EmbeddingLookUpPSKernelMod : public EmbeddingLookUpCpuKernelMod, public PServerKernel {
public:
EmbeddingLookUpPSKernelMod(size_t rank_id, size_t pserver_num, size_t worker_num)
: PServerKernel(rank_id, pserver_num, worker_num) {}
~EmbeddingLookUpPSKernelMod() override = default;
void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &) override;
void ReInit(const std::vector<ShapeVector> &) override;
bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
void UpdateEmbeddings(float *embedding_table, const size_t *lookup_ids, const float *update_vals,
size_t ids_size) override;
const std::vector<size_t> &input_sizes() const override;
const std::vector<size_t> &output_sizes() const override;
const std::vector<size_t> &workspace_sizes() const override;
int64_t offset() const override;
private:
std::vector<size_t> input_shape_;
};
} // namespace ps
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_EMBEDDING_LOOK_UP_PS_KERNEL_H_

View File

@ -1,32 +0,0 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
namespace mindspore {
namespace kernel {
namespace ps {
void PServerKernel::Shard(ShapeVector *shape, int axis) const {
MS_EXCEPTION_IF_NULL(shape);
if ((*shape).size() <= IntToSize(axis)) {
MS_LOG(EXCEPTION) << "Shape size is invalid.";
}
(*shape)[IntToSize(axis)] =
Util::LocalShard((*shape)[IntToSize(axis)], SizeToLong(rank_id_), SizeToLong(pserver_num_));
}
} // namespace ps
} // namespace kernel
} // namespace mindspore

View File

@ -1,59 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PSERVER_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PSERVER_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/kernel.h"
#include "ps/util.h"
namespace mindspore {
namespace kernel {
namespace ps {
using mindspore::ps::Util;
class PServerKernel {
public:
PServerKernel(size_t rank_id, size_t pserver_num, size_t worker_num)
: rank_id_(rank_id), pserver_num_(pserver_num), worker_num_(worker_num) {}
~PServerKernel() = default;
PServerKernel(const PServerKernel &) = delete;
PServerKernel &operator=(const PServerKernel &) = delete;
virtual void InitKernel(const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &) {}
virtual void InitKernel(const CNodePtr &, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &) {}
virtual void ReInit(const std::vector<ShapeVector> &) {}
virtual bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) = 0;
virtual void UpdateEmbeddings(float *embedding_table, const size_t *lookup_ids, const float *update_vals,
size_t ids_size) {}
virtual const std::vector<size_t> &input_sizes() const = 0;
virtual const std::vector<size_t> &output_sizes() const = 0;
virtual const std::vector<size_t> &workspace_sizes() const = 0;
virtual int64_t offset() const { return 0; }
protected:
virtual void ReInit(const std::vector<AddressPtr> &) {}
void Shard(ShapeVector *shape, int axis) const;
size_t rank_id_;
size_t pserver_num_;
size_t worker_num_;
};
} // namespace ps
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PSERVER_KERNEL_H_

View File

@ -1,29 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/cpu/kernel/ps/pull_kernel.h"
namespace mindspore {
namespace kernel {
std::vector<KernelAttr> PullKernelMod::GetOpSupport() {
static const std::vector<KernelAttr> support_list = {
KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32)};
return support_list;
}
MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, Pull, PullKernelMod);
} // namespace kernel
} // namespace mindspore

View File

@ -1,92 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PULL_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PULL_KERNEL_H_
#include <vector>
#include <string>
#include "ps/worker.h"
#include "ps/util.h"
#include "plugin/device/cpu/kernel/cpu_kernel.h"
#include "plugin/factory/ms_factory.h"
namespace mindspore {
namespace kernel {
class PullKernelMod : public DeprecatedNativeCpuKernelMod {
public:
PullKernelMod() : key_(UINT64_MAX), keys_size_(sizeof(size_t)), var_size_(sizeof(size_t)) {}
~PullKernelMod() override = default;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &) override {
if (inputs.size() != 2) {
MS_LOG(EXCEPTION) << "Inputs size is " << inputs.size() << ", but PullKernelMod needs 2.";
}
bool init_in_server = mindspore::ps::Worker::GetInstance().GetParamInitInServer(param_name_);
// If init_in_server, forward kernel should run in server too.
if (!init_in_server) {
mindspore::ps::Worker::GetInstance().Pull(key_, inputs[1]->addr, inputs[1]->size);
}
return true;
}
void Init(const CNodePtr &kernel_node) override {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but pull needs 2 inputs.";
return;
}
auto key_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
keys_size_ *= SizeOf(key_shape);
auto var_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
var_size_ *= SizeOf(var_shape);
auto param_node = common::AnfAlgo::GetInputNode(kernel_node, 1);
MS_EXCEPTION_IF_NULL(param_node);
param_name_ = param_node->fullname_with_scope();
if (mindspore::ps::PSContext::instance()->is_worker()) {
key_ = common::AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
}
InitSizeLists();
return;
}
std::vector<KernelAttr> GetOpSupport() override;
void InitKernel(const CNodePtr &) override { return; }
protected:
void InitSizeLists() {
input_size_list_.push_back(keys_size_);
input_size_list_.push_back(var_size_);
output_size_list_.push_back(0);
}
private:
size_t key_;
size_t keys_size_;
size_t var_size_;
std::string param_name_;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PULL_KERNEL_H_

View File

@ -1,53 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/cpu/kernel/ps/push_kernel.h"
#include <tuple>
namespace mindspore {
namespace kernel {
std::vector<std::tuple<KernelAttr, PushKernelMod::PushFunc, PushKernelMod::PushInitFunc>> PushKernelMod::func_list_ = {
{KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeUInt64),
&PushKernelMod::LaunchKernel<float>, &PushKernelMod::InitFunc<float>},
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt64),
&PushKernelMod::LaunchKernel<float>, &PushKernelMod::InitFunc<float>},
{KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeUInt64),
&PushKernelMod::LaunchKernel<float>, &PushKernelMod::InitFunc<float>}};
std::vector<KernelAttr> PushKernelMod::GetOpSupport() {
std::vector<KernelAttr> support_list;
(void)std::transform(
func_list_.begin(), func_list_.end(), std::back_inserter(support_list),
[](const std::tuple<KernelAttr, PushFunc, PushInitFunc> &tuple_item) { return std::get<0>(tuple_item); });
return support_list;
}
MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, Push, PushKernelMod);
} // namespace kernel
} // namespace mindspore

View File

@ -1,117 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PUSH_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PUSH_KERNEL_H_
#include <vector>
#include <algorithm>
#include <tuple>
#include "ps/worker.h"
#include "ps/util.h"
#include "plugin/device/cpu/kernel/cpu_kernel.h"
#include "plugin/factory/ms_factory.h"
namespace mindspore {
namespace kernel {
class PushKernelMod : public DeprecatedNativeCpuKernelMod {
public:
PushKernelMod() : key_(UINT64_MAX) {}
~PushKernelMod() override = default;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override {
return kernel_func_(this, inputs, workspace, outputs);
}
void Init(const CNodePtr &kernel_node) override {
auto kernel_attr = GetKernelAttrFromNode(kernel_node);
auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport());
if (!is_match) {
MS_LOG(EXCEPTION) << "Push does not support this kernel data type: " << kernel_attr;
}
kernel_func_ = std::get<1>(func_list_[index]);
const size_t kTwoIdx = 2;
init_func_ = std::get<kTwoIdx>(func_list_[index]);
init_func_(this, kernel_node);
}
void InitKernel(const CNodePtr &) override { return; }
std::vector<KernelAttr> GetOpSupport() override;
private:
template <typename T>
void InitFunc(const CNodePtr &kernel_node) {
key_ = common::AnfAlgo::GetNodeAttr<size_t>(kernel_node, kAttrPsKey);
auto optim_input_shapes =
common::AnfAlgo::GetNodeAttr<std::vector<std::vector<int64_t>>>(kernel_node, "optim_input_shapes");
auto only_shape_indices = common::AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "only_shape_indices");
MS_LOG(INFO) << "Key " << key_ << " optimizer input shapes are:" << optim_input_shapes;
MS_LOG(INFO) << "Only init shape indices are " << only_shape_indices;
for (size_t i = 0; i < optim_input_shapes.size(); i++) {
auto shape = optim_input_shapes[i];
mindspore::ps::Worker::GetInstance().SetOptimInputShapes(key_, shape);
if (std::count(only_shape_indices.begin(), only_shape_indices.end(), i) == 0) {
size_t size = sizeof(T);
for (size_t j = 0; j < shape.size(); j++) {
size *= LongToSize(shape[j]);
}
input_size_list_.push_back(size);
}
}
output_size_list_.push_back(sizeof(size_t));
return;
}
template <typename T>
bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (outputs.size() != 1) {
MS_LOG(EXCEPTION) << "Outputs size is " << outputs.size() << ", but PushKernelMod needs 1.";
}
std::vector<size_t> keys;
std::vector<uintptr_t> addrs;
std::vector<int64_t> sizes;
for (auto input : inputs) {
keys.push_back(key_);
addrs.push_back(reinterpret_cast<uintptr_t>(input->addr));
sizes.push_back(SizeToLong(input->size) / SizeToLong(sizeof(T)));
}
mindspore::ps::Worker::GetInstance().Push(keys, addrs, sizes);
auto ret = memcpy_s(outputs[0]->addr, outputs[0]->size, &key_, sizeof(size_t));
if (ret != EOK) {
MS_LOG(EXCEPTION) << "Lookup id memcpy failed.";
}
return true;
}
using PushFunc =
std::function<bool(PushKernelMod *, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &, const std::vector<kernel::AddressPtr> &)>;
using PushInitFunc = std::function<void(PushKernelMod *, const CNodePtr &kernel_node)>;
static std::vector<std::tuple<KernelAttr, PushFunc, PushInitFunc>> func_list_;
PushFunc kernel_func_;
PushInitFunc init_func_;
size_t key_;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_PS_PUSH_KERNEL_H_

View File

@ -1,119 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/cpu/kernel/ps/sparse_apply_adam_ps_kernel.h"
#include <memory>
#include "kernel/common_utils.h"
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
#include "ps/util.h"
namespace mindspore {
namespace kernel {
namespace ps {
constexpr size_t kSparseApplyAdamPSInputsShapeSize = 11;
void SparseApplyAdamPSKernelMod::InitKernel(const CNodePtr &cnode,
const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &shapes) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(shapes);
const std::vector<std::shared_ptr<ShapeVector>> &shape_vec = *shapes;
if (shape_vec.size() < kSparseApplyAdamPSInputsShapeSize) {
MS_LOG(EXCEPTION) << "SparseApplyAdamPSKernelMod needs 10 input shapes, but got " << shape_vec.size();
}
ShapeVector &var_shape = *(shape_vec[var_index_]);
ShapeVector &m_shape = *(shape_vec[m_index_]);
ShapeVector &v_shape = *(shape_vec[v_index_]);
const ShapeVector &grad_shape = *(shape_vec[grad_index_]);
const ShapeVector &indices_shape = *(shape_vec[indices_index_]);
Shard(&var_shape, 0);
Shard(&m_shape, 0);
Shard(&v_shape, 0);
if (var_shape.empty()) {
MS_LOG(EXCEPTION) << "var must be at least 1D";
}
if (!IsSameShape(var_shape, m_shape)) {
MS_LOG(EXCEPTION) << "var and m must have the same shape";
}
if (!IsSameShape(var_shape, v_shape)) {
MS_LOG(EXCEPTION) << "var and v must have the same shape";
}
if (var_shape.size() != grad_shape.size()) {
MS_LOG(EXCEPTION) << "var and grad must have the same shape size";
}
var_first_dim_size_ = LongToSize(var_shape[0]);
for (size_t i = 1; i < var_shape.size(); ++i) {
if (var_shape[i] != grad_shape[i]) {
MS_LOG(EXCEPTION) << "The shape of var and grad must be equal in dimension " << i;
}
var_outer_dim_size_ *= LongToSize(var_shape[i]);
}
if (indices_shape.size() != 1) {
MS_LOG(EXCEPTION) << "indices must be 1D";
}
indices_size_ = LongToSize(indices_shape[0]);
if (grad_shape[0] != SizeToLong(indices_size_)) {
MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices";
}
if (common::AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) {
use_nesterov_ = common::AnfAlgo::GetNodeAttr<bool>(cnode, USE_NESTEROV);
}
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
(void)workspace_size_list_.emplace_back(var_first_dim_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
}
void SparseApplyAdamPSKernelMod::ReInit(const std::vector<ShapeVector> &shapes) {
if (shapes.empty() || shapes[0].empty()) {
MS_LOG(EXCEPTION) << "Shape is empty";
}
const auto &indices_shape = shapes[0];
indices_size_ = LongToSize(indices_shape[0]);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
}
void SparseApplyAdamPSKernelMod::ReInit(const std::vector<AddressPtr> &inputs) {
if (inputs.size() < kSparseApplyAdamPSInputsShapeSize) {
MS_LOG(EXCEPTION) << "Input numbers can not less to " << kSparseApplyAdamPSInputsShapeSize << ", but got "
<< inputs.size();
}
const auto &indices_addr = inputs[indices_index_];
indices_size_ = indices_addr->size / sizeof(int);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
workspace_size_list_[1] = indices_size_ * sizeof(int);
}
bool SparseApplyAdamPSKernelMod::Execute(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
ReInit(inputs);
if (indices_size_ == 0) {
return true;
}
return Launch(inputs, workspace, outputs);
}
const std::vector<size_t> &SparseApplyAdamPSKernelMod::input_sizes() const { return GetInputSizeList(); }
const std::vector<size_t> &SparseApplyAdamPSKernelMod::output_sizes() const { return GetOutputSizeList(); }
const std::vector<size_t> &SparseApplyAdamPSKernelMod::workspace_sizes() const { return GetWorkspaceSizeList(); }
} // namespace ps
} // namespace kernel
} // namespace mindspore

View File

@ -1,56 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_ADAM_PS_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_ADAM_PS_KERNEL_H_
#include <vector>
#include <memory>
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
#include "plugin/device/cpu/kernel/sparse_apply_adam_cpu_kernel.h"
namespace mindspore {
namespace kernel {
namespace ps {
using mindspore::kernel::SparseApplyAdamCpuKernelMod;
class SparseApplyAdamPSKernelMod : public SparseApplyAdamCpuKernelMod, public PServerKernel {
public:
SparseApplyAdamPSKernelMod(size_t rank_id, size_t pserver_num, size_t worker_num)
: PServerKernel(rank_id, pserver_num, worker_num) {}
~SparseApplyAdamPSKernelMod() override = default;
void InitKernel(const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &) override;
void ReInit(const std::vector<ShapeVector> &) override;
bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
const std::vector<size_t> &input_sizes() const override;
const std::vector<size_t> &output_sizes() const override;
const std::vector<size_t> &workspace_sizes() const override;
protected:
void ReInit(const std::vector<AddressPtr> &) override;
size_t var_index_{0};
size_t m_index_{1};
size_t v_index_{2};
size_t grad_index_{9};
size_t indices_index_{10};
};
} // namespace ps
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_ADAM_PS_KERNEL_H_

View File

@ -1,130 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.h"
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
namespace mindspore {
namespace kernel {
namespace ps {
constexpr size_t kSparseApplyFtrlPSInputSize = 5;
void SparseApplyFtrlPSKernelMod::InitKernel(const CNodePtr &cnode,
const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &shapes) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(shapes);
const std::vector<std::shared_ptr<ShapeVector>> &shape_vec = *shapes;
if (shape_vec.size() < kSparseApplyFtrlPSInputSize) {
MS_LOG(EXCEPTION) << "SparseApplyAdamPSKernelMod needs " << kSparseApplyFtrlPSInputSize << " input shapes, but got "
<< shape_vec.size();
}
auto var_shape = *(shape_vec[var_index_]);
auto accum_shape = *(shape_vec[accum_index_]);
auto linear_shape = *(shape_vec[linear_index_]);
auto grad_shape = *(shape_vec[grad_index_]);
auto indices_shape = *(shape_vec[indices_index_]);
Shard(&var_shape, 0);
Shard(&accum_shape, 0);
Shard(&linear_shape, 0);
if (var_shape.size() != grad_shape.size()) {
MS_LOG(EXCEPTION) << "var and grad must have the same shape size";
}
if (var_shape.empty()) {
MS_LOG(EXCEPTION) << "var must be at least 1D";
} else {
var_first_dim_size_ = LongToSize(var_shape[0]);
}
for (size_t i = 1; i < var_shape.size(); ++i) {
if (var_shape[i] != grad_shape[i]) {
MS_LOG(EXCEPTION) << "The shape of var and grad must be equal in dimension " << i;
}
var_outer_dim_size_ *= LongToSize(var_shape[i]);
}
if (indices_shape.size() != 1) {
MS_LOG(EXCEPTION) << "indices must be a 1D vector";
}
indices_size_ = LongToSize(indices_shape[0]);
if (grad_shape[0] != SizeToLong(indices_size_)) {
MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices";
}
init_accum_ = common::AnfAlgo::GetNodeAttr<float>(cnode, "init_accum");
if (init_accum_ < 0) {
MS_LOG(EXCEPTION) << "init_accum must be a non-negative scalar";
}
lr_ = common::AnfAlgo::GetNodeAttr<float>(cnode, "lr");
if (lr_ <= 0) {
MS_LOG(EXCEPTION) << "lr must be a positive scalar";
}
l1_ = common::AnfAlgo::GetNodeAttr<float>(cnode, "l1");
if (l1_ < 0) {
MS_LOG(EXCEPTION) << "l1 must be a non-negative scalar";
}
l2_ = common::AnfAlgo::GetNodeAttr<float>(cnode, "l2");
if (l2_ < 0) {
MS_LOG(EXCEPTION) << "l2 must be a non-negative scalar";
}
lr_power_ = common::AnfAlgo::GetNodeAttr<float>(cnode, "lr_power");
if (lr_power_ > 0) {
MS_LOG(EXCEPTION) << "lr_power must be a non-positive scalar";
}
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
}
void SparseApplyFtrlPSKernelMod::ReInit(const std::vector<ShapeVector> &shapes) {
if (shapes.empty() || shapes[0].empty()) {
MS_LOG(EXCEPTION) << "Shape can not empty";
}
const auto &indices_shape = shapes[0];
indices_size_ = LongToSize(indices_shape[0]);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
}
void SparseApplyFtrlPSKernelMod::ReInit(const std::vector<AddressPtr> &inputs) {
if (inputs.size() < kSparseApplyFtrlPSInputSize) {
MS_LOG(EXCEPTION) << "Input numbers can not be less than " << kSparseApplyFtrlPSInputSize << ", but got "
<< inputs.size();
}
const auto &indices_addr = inputs[indices_index_];
indices_size_ = indices_addr->size / sizeof(int);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
}
bool SparseApplyFtrlPSKernelMod::Execute(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
ReInit(inputs);
if (indices_size_ == 0) {
return true;
}
return Launch(inputs, workspace, outputs);
}
const std::vector<size_t> &SparseApplyFtrlPSKernelMod::input_sizes() const { return GetInputSizeList(); }
const std::vector<size_t> &SparseApplyFtrlPSKernelMod::output_sizes() const { return GetOutputSizeList(); }
const std::vector<size_t> &SparseApplyFtrlPSKernelMod::workspace_sizes() const { return GetWorkspaceSizeList(); }
} // namespace ps
} // namespace kernel
} // namespace mindspore

View File

@ -1,59 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_SPARSE_APPLY_FTRL_PS_KERNEL_H_
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_SPARSE_APPLY_FTRL_PS_KERNEL_H_
#include <vector>
#include <memory>
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
#include "plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.h"
namespace mindspore {
namespace kernel {
namespace ps {
using mindspore::kernel::SparseApplyFtrlCpuKernelMod;
class SparseApplyFtrlPSKernelMod : public SparseApplyFtrlCpuKernelMod, public PServerKernel {
public:
SparseApplyFtrlPSKernelMod(size_t rank_id, size_t pserver_num, size_t worker_num)
: PServerKernel(rank_id, pserver_num, worker_num), init_accum_(0.1) {}
~SparseApplyFtrlPSKernelMod() override = default;
void InitKernel(const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &) override;
void ReInit(const std::vector<ShapeVector> &) override;
bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
const std::vector<size_t> &input_sizes() const override;
const std::vector<size_t> &output_sizes() const override;
const std::vector<size_t> &workspace_sizes() const override;
const float init_accum() const { return init_accum_; }
protected:
void ReInit(const std::vector<AddressPtr> &) override;
float init_accum_{0.1};
size_t var_index_{0};
size_t accum_index_{1};
size_t linear_index_{2};
size_t grad_index_{3};
size_t indices_index_{4};
};
} // namespace ps
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_SPARSE_APPLY_FTRL_PS_KERNEL_H_

View File

@ -1,120 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/cpu/kernel/ps/sparse_apply_lazy_adam_ps_kernel.h"
#include <memory>
#include "kernel/common_utils.h"
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
#include "ps/util.h"
namespace mindspore {
namespace kernel {
namespace ps {
constexpr size_t kSparseApplyLazyAdamPSInputsSize = 11;
void SparseApplyLazyAdamPSKernelMod::InitKernel(
const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &shapes) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(shapes);
const std::vector<std::shared_ptr<ShapeVector>> &shape_vec = *shapes;
if (shape_vec.size() < kSparseApplyLazyAdamPSInputsSize) {
MS_LOG(EXCEPTION) << "SparseApplyLazyAdamPSKernelMod needs " << kSparseApplyLazyAdamPSInputsSize
<< " input shapes, but got " << shape_vec.size();
}
ShapeVector &var_shape = *(shape_vec[var_index_]);
ShapeVector &m_shape = *(shape_vec[m_index_]);
ShapeVector &v_shape = *(shape_vec[v_index_]);
const ShapeVector &grad_shape = *(shape_vec[grad_index_]);
const ShapeVector &indices_shape = *(shape_vec[indices_index_]);
Shard(&var_shape, 0);
Shard(&m_shape, 0);
Shard(&v_shape, 0);
if (var_shape.empty()) {
MS_LOG(EXCEPTION) << "var must be at least 1D";
}
if (var_shape.size() != grad_shape.size()) {
MS_LOG(EXCEPTION) << "var and grad must have the same shape size";
}
if (!IsSameShape(var_shape, m_shape)) {
MS_LOG(EXCEPTION) << "var and m must have the same shape";
}
if (!IsSameShape(var_shape, v_shape)) {
MS_LOG(EXCEPTION) << "var and v must have the same shape";
}
var_first_dim_size_ = LongToSize(var_shape[0]);
for (size_t i = 1; i < var_shape.size(); ++i) {
if (var_shape[i] != grad_shape[i]) {
MS_LOG(EXCEPTION) << "The shape of var and grad must be equal in dimension " << i;
}
var_outer_dim_size_ *= LongToSize(var_shape[i]);
}
if (indices_shape.size() != 1) {
MS_LOG(EXCEPTION) << "indices must be 1D";
}
indices_size_ = LongToSize(indices_shape[0]);
if (grad_shape[0] != SizeToLong(indices_size_)) {
MS_LOG(ERROR) << "The first dimension of grad shape must be equal to indices";
}
if (common::AnfAlgo::HasNodeAttr(USE_NESTEROV, cnode)) {
use_nesterov_ = common::AnfAlgo::GetNodeAttr<bool>(cnode, USE_NESTEROV);
}
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_);
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(int) * worker_num_);
}
void SparseApplyLazyAdamPSKernelMod::ReInit(const std::vector<ShapeVector> &shapes) {
if (shapes.empty() || shapes[0].empty()) {
MS_LOG(EXCEPTION) << "Shape can not empty";
}
const auto &indices_shape = shapes[0];
indices_size_ = LongToSize(indices_shape[0]);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
}
void SparseApplyLazyAdamPSKernelMod::ReInit(const std::vector<AddressPtr> &inputs) {
if (inputs.size() < kSparseApplyLazyAdamPSInputsSize) {
MS_LOG(EXCEPTION) << "Input shape size can not be less than " << kSparseApplyLazyAdamPSInputsSize << ", but got "
<< inputs.size();
}
const auto &indices_addr = inputs[indices_index_];
indices_size_ = indices_addr->size / sizeof(int);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float) * worker_num_;
workspace_size_list_[1] = indices_size_ * sizeof(int) * worker_num_;
}
bool SparseApplyLazyAdamPSKernelMod::Execute(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
ReInit(inputs);
if (indices_size_ == 0) {
return true;
}
return Launch(inputs, workspace, outputs);
}
const std::vector<size_t> &SparseApplyLazyAdamPSKernelMod::input_sizes() const { return GetInputSizeList(); }
const std::vector<size_t> &SparseApplyLazyAdamPSKernelMod::output_sizes() const { return GetOutputSizeList(); }
const std::vector<size_t> &SparseApplyLazyAdamPSKernelMod::workspace_sizes() const { return GetWorkspaceSizeList(); }
} // namespace ps
} // namespace kernel
} // namespace mindspore

View File

@ -1,55 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_LAZY_ADAM_PS_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_LAZY_ADAM_PS_KERNEL_H_
#include <vector>
#include <memory>
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
#include "plugin/device/cpu/kernel/sparse_apply_lazy_adam_cpu_kernel.h"
namespace mindspore {
namespace kernel {
namespace ps {
using mindspore::kernel::SparseApplyLazyAdamCpuKernelMod;
class SparseApplyLazyAdamPSKernelMod : public SparseApplyLazyAdamCpuKernelMod, public PServerKernel {
public:
SparseApplyLazyAdamPSKernelMod(size_t rank_id, size_t pserver_num, size_t worker_num)
: PServerKernel(rank_id, pserver_num, worker_num) {}
~SparseApplyLazyAdamPSKernelMod() override = default;
void InitKernel(const CNodePtr &cnode, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &) override;
void ReInit(const std::vector<ShapeVector> &) override;
bool Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
const std::vector<size_t> &input_sizes() const override;
const std::vector<size_t> &output_sizes() const override;
const std::vector<size_t> &workspace_sizes() const override;
protected:
void ReInit(const std::vector<AddressPtr> &) override;
size_t var_index_{0};
size_t m_index_{1};
size_t v_index_{2};
size_t grad_index_{9};
size_t indices_index_{10};
};
} // namespace ps
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSE_APPLY_LAZY_ADAM_PS_KERNEL_H_

View File

@ -18,7 +18,6 @@
#include "plugin/device/gpu/hal/device/gpu_memory_allocator.h"
#include "utils/ms_context.h"
#include "include/common/utils/convert_utils.h"
#include "ps/ps_cache/ps_cache_manager.h"
#include "plugin/device/gpu/hal/device/gpu_device_manager.h"
#include "plugin/device/gpu/hal/device/gpu_common.h"
namespace mindspore {
@ -73,9 +72,6 @@ bool GPUMemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList
void GPUMemoryManager::Initialize() {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (ps::ps_cache_instance.initialized_ps_cache()) {
return;
}
auto device_addr = MallocMemFromMemPool(1, false);
if (!device_addr) {
MS_LOG(EXCEPTION) << "Dynamic memory pool init error.";

View File

@ -1,112 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/gpu/hal/device/ps/gpu_ps_cache.h"
#include "ps/ps_cache/ps_cache_factory.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/hash_impl.cuh"
#include "plugin/device/gpu/hal/device/gpu_common.h"
#include "plugin/device/gpu/hal/device/cuda_driver.h"
#include "plugin/device/gpu/hal/device/gpu_memory_allocator.h"
#include "utils/ms_context.h"
namespace mindspore {
namespace ps {
namespace gpu {
MS_REG_PS_CACHE(kGPUDevice, GPUPsCache);
bool GPUPsCache::InitDevice(uint32_t device_id, const void *) {
bool ret = device::gpu::CudaDriver::SetDevice(UintToInt(device_id));
if (!ret) {
MS_LOG(ERROR) << "Failed to set device id:" << device_id;
return false;
}
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(cudaStreamCreate(reinterpret_cast<CUstream_st **>(&stream_)),
"Cuda create stream failed");
return true;
}
void *GPUPsCache::MallocMemory(size_t size) {
return device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(size);
}
void GPUPsCache::FreeMemory(void *device_addr) {
device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(device_addr);
}
bool GPUPsCache::RecordEvent() {
event_.reset(new cudaEvent_t());
MS_ERROR_IF_NULL_W_RET_VAL(event_, false);
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(cudaEventCreate(&(*event_)), "Cuda create event failed");
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(cudaEventRecord(*event_, reinterpret_cast<cudaStream_t>(stream_)),
"Cuda record event failed");
return true;
}
bool GPUPsCache::SynchronizeEvent() {
MS_ERROR_IF_NULL_W_RET_VAL(event_, false);
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(cudaEventSynchronize(*event_), "Cuda sync event failed");
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(cudaEventDestroy(*event_), "Cuda destroy event failed");
return true;
}
bool GPUPsCache::SynchronizeStream() {
MS_ERROR_IF_NULL_W_RET_VAL(stream_, false);
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_)),
"Cuda sync stream failed");
return true;
}
bool GPUPsCache::CopyHostMemToDevice(void *dst, const void *src, size_t size) {
MS_ERROR_IF_NULL(dst);
MS_ERROR_IF_NULL(src);
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(
cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_)),
"Cuda memcpy failed");
return true;
}
bool GPUPsCache::CopyDeviceMemToHost(void *dst, const void *src, size_t size) {
MS_ERROR_IF_NULL(dst);
MS_ERROR_IF_NULL(src);
CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(
cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream_)),
"Cuda memcpy failed");
return true;
}
bool GPUPsCache::HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr, size_t,
size_t embedding_size, size_t swap_out_size) {
MS_ERROR_IF_NULL(hash_table_addr);
MS_ERROR_IF_NULL(swap_out_value_addr);
MS_ERROR_IF_NULL(swap_out_index_addr);
DoHashSwapOut(reinterpret_cast<float *>(hash_table_addr), reinterpret_cast<float *>(swap_out_value_addr),
reinterpret_cast<int *>(swap_out_index_addr), swap_out_size, embedding_size,
reinterpret_cast<cudaStream_t>(stream_));
return true;
}
bool GPUPsCache::HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr, size_t,
size_t embedding_size, size_t swap_in_size) {
MS_ERROR_IF_NULL(hash_table_addr);
MS_ERROR_IF_NULL(swap_in_value_addr);
MS_ERROR_IF_NULL(swap_in_index_addr);
DoHashSwapIn(reinterpret_cast<float *>(hash_table_addr), reinterpret_cast<float *>(swap_in_value_addr),
reinterpret_cast<int *>(swap_in_index_addr), swap_in_size, embedding_size,
reinterpret_cast<cudaStream_t>(stream_));
return true;
}
} // namespace gpu
} // namespace ps
} // namespace mindspore

View File

@ -1,50 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_DEVICE_PS_GPU_PS_CACHE_H_
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_DEVICE_PS_GPU_PS_CACHE_H_
#include <cuda_runtime_api.h>
#include <memory>
#include "ps/ps_cache/ps_cache_basic.h"
namespace mindspore {
namespace ps {
namespace gpu {
class GPUPsCache : public PsCacheBasic {
public:
GPUPsCache() = default;
~GPUPsCache() override = default;
bool InitDevice(uint32_t device_id, const void *context) override;
void *MallocMemory(size_t size) override;
void FreeMemory(void *device_addr) override;
bool RecordEvent() override;
bool SynchronizeEvent() override;
bool SynchronizeStream() override;
bool CopyHostMemToDevice(void *dst, const void *src, size_t size) override;
bool CopyDeviceMemToHost(void *dst, const void *src, size_t size) override;
bool HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr, size_t cache_vocab_size,
size_t embedding_size, size_t swap_out_size) override;
bool HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr, size_t cache_vocab_size,
size_t embedding_size, size_t swap_in_size) override;
private:
std::unique_ptr<cudaEvent_t> event_;
};
} // namespace gpu
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_DEVICE_PS_GPU_PS_CACHE_H_

View File

@ -88,7 +88,7 @@
#include "kernel/graph_kernel_info.h"
#ifdef WITH_BACKEND
#include "ps/util.h"
#include "ps/ps_cache/ps_cache_manager.h"
#include "ps/ps_context.h"
#endif
namespace mindspore {
@ -362,12 +362,6 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
auto input_node = input_nodes[i];
MS_EXCEPTION_IF_NULL(input_node);
if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0)) {
#ifdef WITH_BACKEND
const std::string &param_name = input_node->fullname_with_scope();
if (ps::ps_cache_instance.IsHashTable(param_name)) {
continue;
}
#endif
auto pk_node = input_node->cast<ParameterPtr>();
auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
MS_EXCEPTION_IF_NULL(device_address);
@ -443,9 +437,6 @@ GraphId GPUSession::CompileGraphImpl(const KernelGraphPtr &graph) {
GraphKernelOptimize(graph);
// Start gpu kernel runtime
StartKernelRT();
#ifdef WITH_BACKEND
InitPsWorker(graph);
#endif
// Assign CUDA streams
AssignStream(graph);
#ifdef ENABLE_DUMP_IR
@ -525,11 +516,6 @@ void GPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_grap
E2eDump::UpdateIterOldRTDump(kernel_graph.get());
#endif
#ifdef WITH_BACKEND
// Initialize parameter server
InitPSParamAndOptim(kernel_graph, inputs);
#endif
}
// GPU old runtime.
@ -563,12 +549,6 @@ void GPUSession::ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph)
int kernel_num = kernel_graph->execution_order().size();
int64_t loopsize = (kernel_num > 1) ? ConfigManager::GetInstance().gpu_loopsink_size() : 1;
for (int64_t i = 0; i < loopsize; i++) {
#ifdef WITH_BACKEND
std::string channel_name;
if (ps::PsDataPrefetch::GetInstance().cache_enable() && IsGetNextGraph(kernel_graph, &channel_name)) {
ps::ps_cache_instance.IncreaseGraphStep(channel_name);
}
#endif
Execute(kernel_graph);
}
}

View File

@ -14,7 +14,6 @@ if(NOT ENABLE_CPU OR WIN32)
list(REMOVE_ITEM _PS_SRC_FILES "core/communicator/tcp_server.cc")
list(REMOVE_ITEM _PS_SRC_FILES "core/node.cc")
list(REMOVE_ITEM _PS_SRC_FILES "core/node_manager.cc")
list(REMOVE_ITEM _PS_SRC_FILES "ps_cache/ps_cache_manager.cc")
list(REMOVE_ITEM _PS_SRC_FILES "core/worker_node.cc")
list(REMOVE_ITEM _PS_SRC_FILES "core/ps_worker_node.cc")
list(REMOVE_ITEM _PS_SRC_FILES "core/server_node.cc")
@ -44,13 +43,6 @@ if(NOT ENABLE_CPU OR WIN32)
list(REMOVE_ITEM _PS_SRC_FILES "core/instance_manager.cc")
endif()
if(NOT ENABLE_D)
list(REMOVE_ITEM _PS_SRC_FILES "ps_cache/ascend/ascend_ps_cache.cc")
endif()
if(NOT ENABLE_GPU)
list(REMOVE_ITEM _PS_SRC_FILES "ps_cache/gpu/gpu_ps_cache.cc")
endif()
list(REMOVE_ITEM _PS_SRC_FILES "ps_cache/ps_data/ps_data_prefetch.cc")
list(REMOVE_ITEM _PS_SRC_FILES "ps_cache/ps_data/ps_data_channel.cc")

View File

@ -1,27 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ps/embedding_table_shard_metadata.h"
namespace mindspore {
namespace ps {
uint64_t EmbeddingTableShardMetadata::begin() const { return begin_; }
uint64_t EmbeddingTableShardMetadata::end() const { return end_; }
uint64_t EmbeddingTableShardMetadata::size() const { return end_ - begin_; }
} // namespace ps
} // namespace mindspore

View File

@ -1,40 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_EMBEDDING_TABLE_SHARD_METADATA_H_
#define MINDSPORE_CCSRC_PS_EMBEDDING_TABLE_SHARD_METADATA_H_
#include <iostream>
#include "utils/log_adapter.h"
namespace mindspore {
namespace ps {
class EmbeddingTableShardMetadata {
public:
explicit EmbeddingTableShardMetadata(uint64_t begin, uint64_t end) : begin_(begin), end_(end) {}
virtual ~EmbeddingTableShardMetadata() = default;
uint64_t begin() const;
uint64_t end() const;
uint64_t size() const;
private:
uint64_t begin_;
uint64_t end_;
};
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_EMBEDDING_TABLE_SHARD_METADATA_H_

View File

@ -1,414 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ps/optimizer_info.h"
#include <map>
#include <memory>
#include <string>
#include <functional>
#include "ps/util.h"
namespace mindspore {
namespace ps {
void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) {
MS_EXCEPTION_IF_NULL(workspace);
workspaces_.push_back(workspace);
}
const std::vector<AddressPtr> &OptimizerInfo::inputs() const { return inputs_; }
const std::vector<AddressPtr> &OptimizerInfo::workspaces() const { return workspaces_; }
const std::vector<AddressPtr> &OptimizerInfo::outputs() const { return outputs_; }
bool OptimizerInfo::IsSparse() const { return false; }
const size_t OptimizerInfo::indice_size() const { return 0; }
size_t OptimizerInfo::grad_index() { return 0; }
size_t OptimizerInfo::indices_index() { return 0; }
template <typename T>
void OptimizerInfo::UpdateOptimInputValue(const std::string &optim_type, const std::string &input_name, void *data,
const Lengths &lens) {
MS_EXCEPTION_IF_NULL(data);
if (kOptimToOriginIdx.count(optim_type) == 0 || kOptimToPSSendIdx.count(optim_type) == 0) {
MS_LOG(EXCEPTION) << "Optimizer type " << optim_type << " in not supported.";
}
const OptimOriginIdx &origin_input_map = kOptimToOriginIdx.at(optim_type);
const OptimPSSendIdx &ps_send_index_map = kOptimToPSSendIdx.at(optim_type);
if (ps_send_index_map.count(input_name) == 0 || origin_input_map.count(input_name) == 0) {
MS_LOG(EXCEPTION) << "Optimizer " << optim_type << " has no input for " << input_name;
}
size_t origin_index = origin_input_map.at(input_name);
size_t ps_send_index = ps_send_index_map.at(input_name);
if (ps_send_index >= lens.size() || origin_index >= inputs_.size()) {
MS_LOG(EXCEPTION) << "Index is out of bound for optimizer " << optim_type << ", origin_index:" << origin_index
<< ", ps_send_index:" << ps_send_index;
}
EXC_IF_VEC_IDX_OOB(lens, ps_send_index);
size_t size = IntToSize(lens[ps_send_index]) * sizeof(T);
int offset = std::accumulate(lens.begin(), lens.begin() + SizeToInt(ps_send_index), 0, std::plus<int>());
AddressPtr optim_input = inputs_[origin_index];
MS_EXCEPTION_IF_NULL(optim_input);
void *dst_data = optim_input->addr;
T *src_data = reinterpret_cast<T *>(data) + offset;
MS_EXCEPTION_IF_NULL(dst_data);
MS_EXCEPTION_IF_NULL(src_data);
errno_t ret = memcpy_s(optim_input->addr, optim_input->size, src_data, size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
return;
}
return;
}
void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
MS_EXCEPTION_IF_NULL(gradient()->addr);
float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
size_t size = gradient()->size / sizeof(float);
size_t grad_index = this->grad_index();
size_t grad_offset = 0;
for (size_t i = 0; i < grad_index; i++) {
grad_offset += IntToSize(lengths[i]);
}
float *grad_data = const_cast<float *>(values.data()) + grad_offset;
MS_EXCEPTION_IF_NULL(grad_data);
#define google mindspore_private
CHECK_EQ(size, IntToSize(lengths[grad_index]));
#undef google
for (size_t i = 0; i < size; i++) {
accum_grad_data[i] += grad_data[i];
}
}
void DenseOptimInfo::ComputeMean(const std::vector<ShapeVector> &, size_t n, size_t, size_t) {
if (n > 1) {
MS_EXCEPTION_IF_NULL(gradient()->addr);
float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
size_t size = gradient()->size / sizeof(float);
for (size_t i = 0; i < size; i++) {
accum_grad_data[i] /= n;
}
}
}
void DenseOptimInfo::Reset() {
MS_EXCEPTION_IF_NULL(gradient()->addr);
errno_t ret = memset_s(gradient()->addr, gradient()->size, 0x00, gradient()->size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
return;
}
}
void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
// Append grad data to the end
MS_EXCEPTION_IF_NULL(gradient()->addr);
float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
size_t grad_index = this->grad_index();
size_t grad_offset = 0;
for (size_t i = 0; i < grad_index; i++) {
grad_offset += IntToSize(lengths[i]);
}
float *incr_grad_data = const_cast<float *>(values.data()) + grad_offset;
MS_EXCEPTION_IF_NULL(incr_grad_data);
size_t incr_grad_size = IntToSize(lengths[grad_index]) * sizeof(float);
size_t dst_size = incr_grad_size;
size_t src_size = incr_grad_size;
void *dst_data = accum_grad_data + grads_offset_;
void *src_data = incr_grad_data;
MS_EXCEPTION_IF_NULL(dst_data);
MS_EXCEPTION_IF_NULL(src_data);
errno_t ret = memcpy_s(dst_data, dst_size, src_data, src_size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
return;
}
grads_offset_ += IntToSize(lengths[grad_index]);
gradient()->size += incr_grad_size;
// Append indice data to the end
MS_EXCEPTION_IF_NULL(indices()->addr);
int *accum_indices_data = reinterpret_cast<int *>(indices()->addr);
MS_EXCEPTION_IF_NULL(accum_indices_data);
size_t indices_index = this->indices_index();
size_t indice_offset = 0;
for (size_t i = 0; i < indices_index; i++) {
indice_offset += IntToSize(lengths[i]);
}
void *incr_indice_data_temp = const_cast<float *>(values.data()) + indice_offset;
MS_EXCEPTION_IF_NULL(incr_indice_data_temp);
int *incr_indice_data = reinterpret_cast<int *>(incr_indice_data_temp);
MS_EXCEPTION_IF_NULL(incr_indice_data);
size_t incr_indice_size = lengths[indices_index];
size_t incr_indice_data_size = incr_indice_size * sizeof(int);
dst_size = incr_indice_data_size;
src_size = incr_indice_data_size;
dst_data = accum_indices_data + indices_offset_;
src_data = incr_indice_data;
MS_EXCEPTION_IF_NULL(dst_data);
MS_EXCEPTION_IF_NULL(src_data);
errno_t ret2 = memcpy_s(dst_data, dst_size, src_data, src_size);
if (ret2 != EOK) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret2 << ")";
return;
}
indices_offset_ += IntToSize(lengths[indices_index]);
indices()->size += incr_indice_data_size;
}
void SparseOptimInfo::ComputeMean(const std::vector<ShapeVector> &shapes, size_t n, size_t server_num, size_t rank_id) {
if (n == 0 || indices()->size == 0) {
MS_LOG(EXCEPTION) << "The size of shapes or indices are 0.";
}
size_t indices_size = static_cast<size_t>(indices()->size / sizeof(int));
size_t segment_size = gradient()->size / indices()->size;
std::vector<float> new_grad(indices_size * segment_size);
std::vector<int> new_indices(indices_size);
mindspore::kernel::SparseGradient<int> unique_sparse_grad({new_grad.data(), new_indices.data(), indices_size});
if (shapes.size() < 2 || shapes[1].empty()) {
MS_LOG(EXCEPTION) << "No input shape found";
}
auto input_shapes = shapes[1];
if (input_shapes.size() == 0) {
MS_LOG(EXCEPTION) << "Invalid input shapes";
}
size_t first_dim_size = input_shapes.front();
size_t outer_dim_size = segment_size;
if (first_dim_size == 0 || outer_dim_size == 0) {
MS_LOG(ERROR) << "Invalid first dim size";
}
MS_EXCEPTION_IF_NULL(gradient()->addr);
MS_EXCEPTION_IF_NULL(indices()->addr);
float *grad_data = reinterpret_cast<float *>(gradient()->addr);
int *indices_data = reinterpret_cast<int *>(indices()->addr);
if (sharded_) {
auto original_row_count = input_shapes.front();
if (original_row_count > 0) {
size_t offset = 0;
std::map<int64_t, int64_t> rank_dims =
Util::AllRankLocalShard(original_row_count, SizeToLong(rank_id), SizeToLong(server_num));
for (size_t i = 0; i < rank_id; i++) {
if (rank_dims.count(i) == 0) {
MS_LOG(EXCEPTION) << "No local shard number for rank " << i;
}
offset += LongToSize(rank_dims[i]);
}
for (size_t j = 0; j < indices_size; j++) {
indices_data[j] -= SizeToInt(offset);
}
}
}
Util::ReduceSparseGradient(grad_data, indices_data, indices_size, segment_size, first_dim_size, outer_dim_size,
&unique_sparse_grad);
size_t reduced_grad_size = unique_sparse_grad.indices_size_ * segment_size * sizeof(float);
MS_EXCEPTION_IF_NULL(unique_sparse_grad.value_);
errno_t ret = memcpy_s(gradient()->addr, gradient()->size, unique_sparse_grad.value_, reduced_grad_size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
return;
}
size_t reduced_indice_size = unique_sparse_grad.indices_size_ * sizeof(int);
MS_EXCEPTION_IF_NULL(unique_sparse_grad.indices_);
ret = memcpy_s(indices()->addr, indices()->size, unique_sparse_grad.indices_, reduced_indice_size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
return;
}
gradient()->size = reduced_grad_size;
indices()->size = reduced_indice_size;
for (size_t i = 0; i < unique_sparse_grad.indices_size_ * segment_size; i++) {
grad_data[i] = grad_data[i] / n;
}
}
void SparseOptimInfo::Reset() {
gradient()->size = 0;
indices()->size = 0;
grads_offset_ = 0;
indices_offset_ = 0;
}
MomentumOptimInfo::MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate,
const AddressPtr &learning_rate, const AddressPtr &gradient,
const AddressPtr &momentum) {
MS_EXCEPTION_IF_NULL(weight);
MS_EXCEPTION_IF_NULL(accumulate);
MS_EXCEPTION_IF_NULL(learning_rate);
MS_EXCEPTION_IF_NULL(gradient);
MS_EXCEPTION_IF_NULL(momentum);
inputs_.push_back(weight);
inputs_.push_back(accumulate);
inputs_.push_back(learning_rate);
inputs_.push_back(gradient);
inputs_.push_back(momentum);
}
void MomentumOptimInfo::Update(const Values &values, const Lengths &lens) {
UpdateOptimInputValue<float>(kApplyMomentum, "lr", const_cast<float *>(values.data()), lens);
}
const size_t SparseOptimInfo::indice_size() const { return indices_offset_; }
const AddressPtr &MomentumOptimInfo::gradient() {
size_t origin_grad_index = kMomentumOriginIdx.at("grad");
EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
return inputs_[origin_grad_index];
}
const AddressPtr &MomentumOptimInfo::indices() {
size_t origin_grad_index = kMomentumOriginIdx.at("grad");
EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
return inputs_[origin_grad_index];
}
size_t MomentumOptimInfo::grad_index() {
size_t ps_grad_index = kMomentumPSSendIdx.at("grad");
return ps_grad_index;
}
SparseAdamOptimInfo::SparseAdamOptimInfo(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v,
const AddressPtr &beta1_power, const AddressPtr &beta2_power,
const AddressPtr &learning_rate, const AddressPtr &beta1,
const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
const AddressPtr &indices, bool sharded) {
MS_EXCEPTION_IF_NULL(weight);
MS_EXCEPTION_IF_NULL(m);
MS_EXCEPTION_IF_NULL(v);
MS_EXCEPTION_IF_NULL(beta1_power);
MS_EXCEPTION_IF_NULL(beta2_power);
MS_EXCEPTION_IF_NULL(learning_rate);
MS_EXCEPTION_IF_NULL(beta1);
MS_EXCEPTION_IF_NULL(beta2);
MS_EXCEPTION_IF_NULL(epsilon);
MS_EXCEPTION_IF_NULL(grad);
MS_EXCEPTION_IF_NULL(indices);
inputs_.push_back(weight);
inputs_.push_back(m);
inputs_.push_back(v);
inputs_.push_back(beta1_power);
inputs_.push_back(beta2_power);
inputs_.push_back(learning_rate);
inputs_.push_back(beta1);
inputs_.push_back(beta2);
inputs_.push_back(epsilon);
inputs_.push_back(grad);
inputs_.push_back(indices);
grads_offset_ = grad->size / sizeof(float);
indices_offset_ = indices->size / sizeof(int);
sharded_ = sharded;
}
void SparseAdamOptimInfo::Update(const Values &values, const Lengths &lens) {
UpdateOptimInputValue<float>(kSparseAdam, "beta1_power", const_cast<float *>(values.data()), lens);
UpdateOptimInputValue<float>(kSparseAdam, "beta2_power", const_cast<float *>(values.data()), lens);
UpdateOptimInputValue<float>(kSparseAdam, "lr", const_cast<float *>(values.data()), lens);
UpdateOptimInputValue<float>(kSparseAdam, "beta1", const_cast<float *>(values.data()), lens);
UpdateOptimInputValue<float>(kSparseAdam, "beta2", const_cast<float *>(values.data()), lens);
UpdateOptimInputValue<float>(kSparseAdam, "eps", const_cast<float *>(values.data()), lens);
}
const AddressPtr &SparseAdamOptimInfo::gradient() {
size_t origin_grad_index = kSparseAdamOriginIdx.at("grad");
EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
return inputs_[origin_grad_index];
}
const AddressPtr &SparseAdamOptimInfo::indices() {
size_t origin_indices_index = kSparseAdamOriginIdx.at("indices");
EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
MS_EXCEPTION_IF_NULL(inputs_[origin_indices_index]);
return inputs_[origin_indices_index];
}
bool SparseAdamOptimInfo::IsSparse() const { return true; }
size_t SparseAdamOptimInfo::grad_index() {
size_t ps_grad_index = kSparseAdamPSSendIdx.at("grad");
return ps_grad_index;
}
size_t SparseAdamOptimInfo::indices_index() {
size_t ps_indices_index = kSparseAdamPSSendIdx.at("indices");
return ps_indices_index;
}
SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
const AddressPtr &grad, const AddressPtr &indices, bool sharded) {
MS_EXCEPTION_IF_NULL(weight);
MS_EXCEPTION_IF_NULL(accum);
MS_EXCEPTION_IF_NULL(linear);
MS_EXCEPTION_IF_NULL(grad);
MS_EXCEPTION_IF_NULL(indices);
inputs_.push_back(weight);
inputs_.push_back(accum);
inputs_.push_back(linear);
inputs_.push_back(grad);
inputs_.push_back(indices);
grads_offset_ = grad->size / sizeof(float);
indices_offset_ = indices->size / sizeof(int);
sharded_ = sharded;
}
const AddressPtr &SparseFtrlOptimInfo::gradient() {
size_t origin_grad_index = kSparseFtrlOriginIdx.at("grad");
EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
return inputs_[origin_grad_index];
}
const AddressPtr &SparseFtrlOptimInfo::indices() {
size_t origin_indices_index = kSparseFtrlOriginIdx.at("indices");
EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
MS_EXCEPTION_IF_NULL(inputs_[origin_indices_index]);
return inputs_[origin_indices_index];
}
bool SparseFtrlOptimInfo::IsSparse() const { return true; }
size_t SparseFtrlOptimInfo::grad_index() {
size_t ps_grad_index = kSparseFtrlPSSendIdx.at("grad");
return ps_grad_index;
}
size_t SparseFtrlOptimInfo::indices_index() {
size_t ps_indices_index = kSparseFtrlPSSendIdx.at("indices");
return ps_indices_index;
}
} // namespace ps
} // namespace mindspore

View File

@ -1,127 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_H_
#define MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_H_
#include <vector>
#include <string>
#include "kernel/kernel.h"
#include "ps/constants.h"
namespace mindspore {
namespace ps {
using mindspore::kernel::AddressPtr;
class OptimizerInfo {
public:
OptimizerInfo() = default;
virtual ~OptimizerInfo() = default;
virtual void Update(const Values &values, const Lengths &lengths) {}
virtual void Accumulate(const Values &values, const Lengths &lengths) = 0;
virtual void ComputeMean(const std::vector<ShapeVector> &shapes, size_t n, size_t server_num, size_t rank_id) {}
virtual void Reset() {}
void AddWorkspace(const AddressPtr &workspace);
virtual const AddressPtr &gradient() = 0;
virtual const AddressPtr &indices() = 0;
virtual const size_t indice_size() const;
const std::vector<AddressPtr> &inputs() const;
const std::vector<AddressPtr> &workspaces() const;
const std::vector<AddressPtr> &outputs() const;
virtual bool IsSparse() const;
virtual size_t grad_index();
virtual size_t indices_index();
protected:
template <typename T>
void UpdateOptimInputValue(const std::string &optim_type, const std::string &input_name, void *data,
const Lengths &lens);
std::vector<AddressPtr> inputs_;
std::vector<AddressPtr> workspaces_;
std::vector<AddressPtr> outputs_;
};
class DenseOptimInfo : public OptimizerInfo {
public:
DenseOptimInfo() = default;
~DenseOptimInfo() override = default;
void Accumulate(const Values &values, const Lengths &lens) override;
void ComputeMean(const std::vector<ShapeVector> &shapes, size_t n, size_t server_num, size_t rank_id) override;
void Reset() override;
};
class SparseOptimInfo : public OptimizerInfo {
public:
SparseOptimInfo() = default;
~SparseOptimInfo() override = default;
void Accumulate(const Values &values, const Lengths &lens) override;
void ComputeMean(const std::vector<ShapeVector> &shapes, size_t n, size_t server_num, size_t rank_id) override;
void Reset() override;
const size_t indice_size() const override;
protected:
size_t grads_offset_{0};
size_t indices_offset_{0};
bool sharded_{true};
};
class MomentumOptimInfo : public DenseOptimInfo {
public:
MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate, const AddressPtr &learning_rate,
const AddressPtr &gradient, const AddressPtr &momentum);
~MomentumOptimInfo() override = default;
void Update(const Values &values, const Lengths &lens) override;
const AddressPtr &gradient();
const AddressPtr &indices();
size_t grad_index() override;
};
class SparseAdamOptimInfo : public SparseOptimInfo {
public:
SparseAdamOptimInfo(const AddressPtr &weight, const AddressPtr &m, const AddressPtr &v, const AddressPtr &beta1_power,
const AddressPtr &beta2_power, const AddressPtr &learning_rate, const AddressPtr &beta1,
const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
const AddressPtr &indices, bool sharded);
~SparseAdamOptimInfo() override = default;
void Update(const Values &values, const Lengths &lens) override;
const AddressPtr &gradient();
const AddressPtr &indices();
bool IsSparse() const override;
size_t grad_index() override;
size_t indices_index() override;
};
class SparseFtrlOptimInfo : public SparseOptimInfo {
public:
SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
const AddressPtr &grad, const AddressPtr &indices, bool sharded);
~SparseFtrlOptimInfo() override = default;
const AddressPtr &gradient();
const AddressPtr &indices();
bool IsSparse() const override;
size_t grad_index() override;
size_t indices_index() override;
};
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_H_

View File

@ -1,248 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ps/optimizer_info_builder.h"
#include <vector>
#include <memory>
#include <functional>
#include "plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.h"
namespace mindspore {
namespace ps {
using mindspore::kernel::ps::SparseApplyFtrlPSKernelMod;
OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel> &pserver_kernel,
const WeightPtr &weight, const Keys &keys, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num,
bool sharded) {
MS_EXCEPTION_IF_NULL(pserver_kernel);
MS_EXCEPTION_IF_NULL(weight);
MS_EXCEPTION_IF_NULL(inputs_shape);
OptimizerInfo *optim_info =
BuildInputs(weight, keys, values, lens, inputs_shape, worker_num, pserver_kernel, sharded);
MS_EXCEPTION_IF_NULL(optim_info);
std::vector<size_t> ws_sizes = pserver_kernel->workspace_sizes();
BuildWorkspaces(optim_info, ws_sizes, worker_num);
BuildOutputs(optim_info, worker_num);
return optim_info;
}
void OptimizerInfoBuilder::BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t) {
MS_EXCEPTION_IF_NULL(info);
for (size_t i = 0; i < ws_sizes.size(); i++) {
size_t size = ws_sizes[i];
AddressPtr workspace = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(workspace);
workspace->addr = new float[size];
MS_EXCEPTION_IF_NULL(workspace->addr);
workspace->size = size;
info->AddWorkspace(workspace);
}
}
template <typename T>
AddressPtr OptimizerInfoBuilder::GenInputAddrPtr(const std::string &optim_type, const std::string &input_name,
void *ps_data, const Lengths &ps_lens,
const InputsShapePtr &inputs_shape) {
MS_EXCEPTION_IF_NULL(ps_data);
// Take note of that the data type maybe inconsistent in ps_data.
MS_LOG(INFO) << "Get input address pointer for optimizer:" << optim_type << ", input name:" << input_name;
AddressPtr addr_ptr = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(addr_ptr);
if (kOptimToOriginIdx.count(optim_type) == 0 || kOptimToPSSendIdx.count(optim_type) == 0) {
MS_LOG(EXCEPTION) << "Optimizer type " << optim_type << " in not supported.";
}
const OptimOriginIdx &origin_input_map = kOptimToOriginIdx.at(optim_type);
const OptimPSSendIdx &ps_send_index_map = kOptimToPSSendIdx.at(optim_type);
if (ps_send_index_map.count(input_name) == 0 || origin_input_map.count(input_name) == 0) {
MS_LOG(EXCEPTION) << "Optimizer " << optim_type << " has no input for " << input_name;
}
size_t ps_index = ps_send_index_map.at(input_name);
if (ps_index == INDEX_NOT_SEND) {
MS_LOG(EXCEPTION) << "Input " << input_name << " is not supposed to be sent to PS.";
}
size_t addr_data_size, addr_data_offset;
if (inputs_shape != nullptr) {
// addr_data_size should be calculated by inputs_shape if it's passed.
size_t origin_index = origin_input_map.at(input_name);
EXC_IF_VEC_IDX_OOB((*inputs_shape), origin_index);
MS_EXCEPTION_IF_NULL((*inputs_shape)[origin_index]);
auto shape = *((*inputs_shape)[origin_index]);
addr_data_size = SizeOf(shape) * worker_num_;
} else {
EXC_IF_VEC_IDX_OOB(ps_lens, ps_index);
addr_data_size = IntToSize(ps_lens[ps_index]);
}
addr_data_offset =
IntToSize(std::accumulate(ps_lens.begin(), ps_lens.begin() + SizeToInt(ps_index), 0, std::plus<int>()));
// The size in ps_lens instead of addr_data_size is the size of real data.
T *buffer = new T[addr_data_size];
addr_ptr->size = IntToSize(ps_lens[ps_index]) * sizeof(T);
addr_ptr->addr = buffer;
size_t dst_size = addr_ptr->size;
size_t src_size = addr_ptr->size;
void *dst_data = addr_ptr->addr;
void *src_data = reinterpret_cast<T *>(ps_data) + addr_data_offset;
MS_EXCEPTION_IF_NULL(dst_data);
MS_EXCEPTION_IF_NULL(src_data);
errno_t ret = memcpy_s(dst_data, dst_size, src_data, src_size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
delete[] buffer;
buffer = nullptr;
return nullptr;
}
return addr_ptr;
}
OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &, const Values &values,
const Lengths &lens, const InputsShapePtr &, size_t,
const std::shared_ptr<PServerKernel> &, bool) {
MS_EXCEPTION_IF_NULL(weight);
AddressPtr weight_addr = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(weight_addr);
weight_addr->addr = weight->data();
weight_addr->size = weight->size() * sizeof(float);
AddressPtr accumulate = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(accumulate);
accumulate->addr = new float[weight->size()];
MS_EXCEPTION_IF_NULL(accumulate->addr);
accumulate->size = sizeof(float) * weight->size();
int64_t ret = memset_s(accumulate->addr, accumulate->size, 0x00, accumulate->size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
delete[] reinterpret_cast<float *>(accumulate->addr);
accumulate->addr = nullptr;
return nullptr;
}
AddressPtr learning_rate = GenInputAddrPtr<float>(kApplyMomentum, "lr", const_cast<float *>(values.data()), lens);
MS_EXCEPTION_IF_NULL(learning_rate);
AddressPtr gradient = GenInputAddrPtr<float>(kApplyMomentum, "grad", const_cast<float *>(values.data()), lens);
MS_EXCEPTION_IF_NULL(gradient);
AddressPtr momentum = GenInputAddrPtr<float>(kApplyMomentum, "momentum", const_cast<float *>(values.data()), lens);
MS_EXCEPTION_IF_NULL(momentum);
return new MomentumOptimInfo(weight_addr, accumulate, learning_rate, gradient, momentum);
}
OptimizerInfo *SparseAdamOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape, size_t,
const std::shared_ptr<PServerKernel> &, bool sharded) {
AddressPtr weight_addr = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(weight_addr);
weight_addr->addr = weight->data();
weight_addr->size = weight->size() * sizeof(float);
AddressPtr m = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(m);
m->addr = new float[weight->size()];
MS_EXCEPTION_IF_NULL(m->addr);
m->size = weight->size() * sizeof(float);
errno_t ret = memset_s(m->addr, m->size, 0x00, m->size);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
delete[] reinterpret_cast<float *>(m->addr);
m->addr = nullptr;
return nullptr;
}
AddressPtr v = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(v);
v->addr = new float[weight->size()];
MS_EXCEPTION_IF_NULL(v->addr);
v->size = weight->size() * sizeof(float);
ret = memset_s(v->addr, v->size, 0x00, v->size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
delete[] reinterpret_cast<float *>(v->addr);
v->addr = nullptr;
delete[] reinterpret_cast<float *>(m->addr);
m->addr = nullptr;
return nullptr;
}
AddressPtr beta1_power = GenInputAddrPtr<float>(kSparseAdam, "beta1_power", const_cast<float *>(values.data()), lens);
MS_EXCEPTION_IF_NULL(beta1_power);
AddressPtr beta2_power = GenInputAddrPtr<float>(kSparseAdam, "beta2_power", const_cast<float *>(values.data()), lens);
MS_EXCEPTION_IF_NULL(beta2_power);
AddressPtr learning_rate = GenInputAddrPtr<float>(kSparseAdam, "lr", const_cast<float *>(values.data()), lens);
MS_EXCEPTION_IF_NULL(learning_rate);
AddressPtr beta1 = GenInputAddrPtr<float>(kSparseAdam, "beta1", const_cast<float *>(values.data()), lens);
MS_EXCEPTION_IF_NULL(beta1);
AddressPtr beta2 = GenInputAddrPtr<float>(kSparseAdam, "beta2", const_cast<float *>(values.data()), lens);
MS_EXCEPTION_IF_NULL(beta2);
AddressPtr epsilon = GenInputAddrPtr<float>(kSparseAdam, "eps", const_cast<float *>(values.data()), lens);
MS_EXCEPTION_IF_NULL(epsilon);
AddressPtr grad = GenInputAddrPtr<float>(kSparseAdam, "grad", const_cast<float *>(values.data()), lens, inputs_shape);
MS_EXCEPTION_IF_NULL(grad);
AddressPtr indices =
GenInputAddrPtr<float>(kSparseAdam, "indices", const_cast<float *>(values.data()), lens, inputs_shape);
MS_EXCEPTION_IF_NULL(indices);
return new SparseAdamOptimInfo(weight_addr, m, v, beta1_power, beta2_power, learning_rate, beta1, beta2, epsilon,
grad, indices, sharded);
}
OptimizerInfo *SparseFtrlOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape, size_t,
const std::shared_ptr<PServerKernel> &pserver_kernel,
bool sharded) {
MS_EXCEPTION_IF_NULL(inputs_shape);
AddressPtr weight_addr = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(weight_addr);
weight_addr->addr = weight->data();
weight_addr->size = weight->size() * sizeof(float);
AddressPtr accum = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(accum);
accum->addr = new float[weight->size()];
MS_EXCEPTION_IF_NULL(accum->addr);
accum->size = weight->size() * sizeof(float);
for (size_t i = 0; i < weight->size(); i++) {
float *tmp = reinterpret_cast<float *>(accum->addr);
tmp[i] = std::dynamic_pointer_cast<SparseApplyFtrlPSKernelMod>(pserver_kernel)->init_accum();
}
AddressPtr linear = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(linear);
linear->addr = new float[weight->size()];
MS_EXCEPTION_IF_NULL(linear->addr);
linear->size = weight->size() * sizeof(float);
errno_t ret = memset_s(linear->addr, weight->size() * sizeof(float), 0x00, weight->size() * sizeof(float));
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memset_s error, errorno(" << ret << ")";
delete[] reinterpret_cast<float *>(linear->addr);
linear->addr = nullptr;
return nullptr;
}
AddressPtr grad = GenInputAddrPtr<float>(kSparseFtrl, "grad", const_cast<float *>(values.data()), lens, inputs_shape);
MS_EXCEPTION_IF_NULL(grad);
AddressPtr indices =
GenInputAddrPtr<float>(kSparseFtrl, "indices", const_cast<float *>(values.data()), lens, inputs_shape);
MS_EXCEPTION_IF_NULL(indices);
return new SparseFtrlOptimInfo(weight_addr, accum, linear, grad, indices, sharded);
}
} // namespace ps
} // namespace mindspore

View File

@ -1,83 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_BUILDER_H_
#define MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_BUILDER_H_
#include <vector>
#include <memory>
#include <string>
#include "kernel/kernel.h"
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
#include "ps/optimizer_info.h"
namespace mindspore {
namespace ps {
using mindspore::kernel::KernelMod;
using mindspore::kernel::ps::PServerKernel;
class OptimizerInfoBuilder {
public:
explicit OptimizerInfoBuilder(size_t worker_num) : worker_num_(worker_num) {}
virtual ~OptimizerInfoBuilder() = default;
OptimizerInfo *Build(const std::shared_ptr<PServerKernel> &pserver_kernel, const WeightPtr &weight, const Keys &keys,
const Values &values, const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num,
bool sharded);
virtual OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values,
const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel, bool sharded) = 0;
virtual void BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t worker_num);
virtual void BuildOutputs(OptimizerInfo *info, size_t worker_num) {}
protected:
template <typename T>
AddressPtr GenInputAddrPtr(const std::string &optim_type, const std::string &input_name, void *ps_data,
const Lengths &lens, const InputsShapePtr &inputs_shape = nullptr);
size_t worker_num_;
};
class MomentumOptimInfoBuilder : public OptimizerInfoBuilder {
public:
explicit MomentumOptimInfoBuilder(size_t worker_num) : OptimizerInfoBuilder(worker_num) {}
~MomentumOptimInfoBuilder() = default;
OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
const InputsShapePtr &inputs_shape, size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel, bool sharded) override;
};
class SparseAdamOptimInfoBuilder : public OptimizerInfoBuilder {
public:
explicit SparseAdamOptimInfoBuilder(size_t worker_num) : OptimizerInfoBuilder(worker_num) {}
~SparseAdamOptimInfoBuilder() = default;
OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
const InputsShapePtr &inputs_shape, size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel, bool sharded) override;
};
class SparseFtrlOptimInfoBuilder : public OptimizerInfoBuilder {
public:
explicit SparseFtrlOptimInfoBuilder(size_t worker_num) : OptimizerInfoBuilder(worker_num) {}
~SparseFtrlOptimInfoBuilder() = default;
OptimizerInfo *BuildInputs(const WeightPtr &weight, const Keys &keys, const Values &values, const Lengths &lens,
const InputsShapePtr &inputs_shape, size_t worker_num,
const std::shared_ptr<PServerKernel> &pserver_kernel, bool sharded) override;
};
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_OPTIMIZER_INFO_BUILDER_H_

File diff suppressed because it is too large Load Diff

View File

@ -1,244 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_PARAMETER_SERVER_H_
#define MINDSPORE_CCSRC_PS_PARAMETER_SERVER_H_
#include <unistd.h>
#include <string>
#include <iostream>
#include <memory>
#include <vector>
#include <mutex>
#include <condition_variable>
#include <thread>
#include <cmath>
#include <random>
#include <utility>
#include <list>
#include <map>
#include <functional>
#include <algorithm>
#include "utils/hash_map.h"
#include "ir/func_graph.h"
#include "backend/common/session/session_basic.h"
#include "backend/common/session/anf_runtime_algorithm.h"
#include "include/common/utils/anfalgo.h"
#include "backend/common/session/session_factory.h"
#include "ps/optimizer_info.h"
#include "ps/optimizer_info_builder.h"
#include "ps/ps_context.h"
#include "plugin/device/cpu/hal/device/kernel_select_cpu.h"
#include "utils/ms_context.h"
#include "kernel/kernel.h"
#include "plugin/device/cpu/kernel/ps/pserver_kernel.h"
#include "plugin/device/cpu/kernel/ps/sparse_apply_adam_ps_kernel.h"
#include "plugin/device/cpu/kernel/ps/sparse_apply_lazy_adam_ps_kernel.h"
#include "plugin/device/cpu/kernel/ps/sparse_apply_ftrl_ps_kernel.h"
#include "plugin/device/cpu/kernel/ps/apply_momentum_ps_kernel.h"
#include "plugin/device/cpu/kernel/ps/embedding_look_up_ps_kernel.h"
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
#include "ps/random_normal/random_normal.h"
#include "distributed/persistent/data.h"
#include "ps/constants.h"
#include "ps/util.h"
#include "ps/embedding_table_shard_metadata.h"
#include "utils/log_adapter.h"
#include "proto/comm.pb.h"
#include "proto/ps.pb.h"
#include "ps/core/ps_server_node.h"
#include "ps/core/node.h"
#include "include/backend/visible.h"
namespace mindspore {
namespace ps {
class BACKEND_EXPORT ParameterServer {
public:
static ParameterServer &GetInstance();
void Run(const FuncGraphPtr &func_graph);
private:
ParameterServer()
: pserver_num_(0),
worker_num_(0),
grad_accum_count_(0),
handler_(nullptr),
func_graph_(nullptr),
sess_(nullptr),
running_(true),
thread_(nullptr),
persist_thread_(nullptr),
server_node_(nullptr) {}
~ParameterServer() = default;
ParameterServer(const ParameterServer &) = delete;
ParameterServer &operator=(const ParameterServer &) = delete;
class ServerHandler {
public:
explicit ServerHandler(ParameterServer *ps) : ps_(ps) {}
~ServerHandler() = default;
void Init();
void operator()(const std::shared_ptr<core::TcpConnection> &conn, const std::shared_ptr<core::MessageMeta> &meta,
const void *data, size_t size);
void HandlePushReq(const void *data, size_t size, const VectorPtr &res);
void HandlePullReq(const void *data, size_t size, const VectorPtr &res);
void HandleInitWeights(const void *data, size_t size, const VectorPtr &res);
void HandleInitWeightToOptimId(const void *data, size_t size, const VectorPtr &res);
void HandleInitInputsShape(const void *data, size_t size, const VectorPtr &res);
void HandleInitEmbeddings(const void *data, size_t size, const VectorPtr &res);
void HandleCheckReadyForPush(const void *data, size_t size, const VectorPtr &res);
void HandleCheckReadyForPull(const void *data, size_t size, const VectorPtr &res);
void HandleEmbeddingLookup(const void *data, size_t size, const VectorPtr &res);
void HandleUpdateEmbeddings(const void *data, size_t size, const VectorPtr &res);
void HandleFinalize(const void *data, size_t size, const VectorPtr &res);
private:
ParameterServer *ps_;
typedef void (ServerHandler::*RequestHandler)(const void *data, size_t size, const VectorPtr &res);
mindspore::HashMap<int, RequestHandler> handlers_;
mindspore::HashMap<int, std::string> commands_;
mindspore::HashMap<Key, bool> init_weights_;
mindspore::HashMap<Key, bool> init_weight_to_optim_;
mindspore::HashMap<Key, bool> init_optim_info_;
};
// For disaster recovery, you can customize the key-value structure that needs to be persisted, and you can customize
// the business layer disaster recovery function.
class RecoverHandler {
public:
explicit RecoverHandler(ParameterServer *ps) : ps_(ps) {}
~RecoverHandler() = default;
// Initialize storage module and file storage is currently used.
void Init();
// Do disaster recovery.
void Recover();
core::FileConfiguration *config_storage() const { return storage_.get(); }
private:
// Load embedding information from persistent storage to recover embedding table.
void RecoverEmbedding();
ParameterServer *ps_;
typedef void (RecoverHandler::*RecoverFunc)();
mindspore::HashMap<std::string, RecoverFunc> handlers_;
std::unique_ptr<core::FileConfiguration> storage_{nullptr};
};
bool Init(const FuncGraphPtr &func_graph);
void InitOptimInfoBuilders();
void InitWeightKeyToOptims(const Key &key, const int64_t &optim_id);
void InitOptimInputsShape(const Keys &keys, const Values &values, const Lengths &lengths);
void InitWeight(const Key &key, const WeightPtr &weight);
void InitGrad(const Key &key, const GradPtr &grad);
void InitEmbeddingTable(const Key &key, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &shapes,
const ParamInitInfo &param_init_info);
bool HasWeight(const Key &key);
void Finalize();
void UpdateWeights();
void AccumGrad(const Keys &key, const Values &values, const Lengths &lengths);
WeightPtr weight(const Key &key);
void DoEmbeddingLookup(Key key, const LookupIds &lookup_ids, KVMessage *res);
void UpdateEmbeddings(const Key &key, const LookupIds &lookup_ids, const Values &vals);
inline bool ReadyForUpdateWeights() const;
inline bool ReadyForPush(const Key &key);
inline bool ReadyForPull(const Key &key);
inline void ResetGradAccumCount();
const CNodePtr GetCNode(const std::string &name) const;
inline std::mutex &mutex();
void GetEmbeddingTableParamPtr();
void SyncEmbeddingTables();
// Cache embedding table parameter by map, key: parameter name, value: parameter node pointer
void CacheEmbeddingTableParamPtr();
// Whether enable disaster recovery.
bool EnableRecovery() const;
// Persist weight periodically, trigger by scheduler.
void PersistParameters();
// Persist sparse network operators when receive init embedding table message.
void PersistKernels(const Key &key, const std::shared_ptr<std::vector<std::shared_ptr<ShapeVector>>> &shapes,
const ParamInitInfo &param_init_info) const;
// Persist parameters store in parameter server when receive init message.
void PersistInitParameters(const Key &key, const WeightPtr &param);
// Restore sparse network operators and parameters.
void RecoverEmbedding(const std::vector<Key> &keys, const std::vector<std::vector<ShapeVector>> &shapes_list,
const std::vector<std::string> &param_names);
// Restore sparse network operators.
void RecoverKernels(const std::vector<Key> &keys, const std::vector<std::vector<ShapeVector>> &shapes_list,
const std::vector<std::string> &param_names);
// Restore parameters store in parameter server.
void RecoverParameters(const std::vector<Key> &keys);
// Update the indices of modified part of the persistent parameter.
void UpdateDirtyInfo(const Key &key, const LookupIds &lookup_ids, int64_t offset);
// Ser current persistent state to server node.
void set_persistent_state(core::PersistentState persistent_state) const;
std::unique_ptr<RecoverHandler> recover_handler_;
std::atomic_bool finish_recovery_{false};
size_t pserver_num_;
size_t worker_num_;
size_t grad_accum_count_;
std::unique_ptr<ServerHandler> handler_;
FuncGraphPtr func_graph_;
std::shared_ptr<session::SessionBasic> sess_;
bool running_;
bool embedding_param_ptr_cached_{false};
// Used to cache embedding table parameter, key: parameter name, value: parameter node pointer
mindspore::HashMap<std::string, ParameterPtr> embedding_parameter_tables_;
// Used to cache the modified part of the parameter.
mindspore::HashMap<Key, distributed::storage::DirtyInfo> weights_dirty_info_;
mindspore::HashMap<Key, std::shared_ptr<PServerKernel>> optimizers_;
mindspore::HashMap<Key, InputsShapePtr> optim_inputs_shape_;
mindspore::HashMap<Key, InputsShapePtr> original_optim_inputs_shape_;
mindspore::HashMap<Key, std::shared_ptr<OptimizerInfo>> optim_infos_;
mindspore::HashMap<std::string, std::shared_ptr<OptimizerInfoBuilder>> optim_info_builders_;
mindspore::HashMap<Key, std::string> weight_key_to_optims_;
mindspore::HashMap<Key, std::string> weight_key_to_optim_op_;
mindspore::HashMap<Key, WeightPtr> weights_;
mindspore::HashMap<Key, bool> is_embedding_;
mindspore::HashMap<Key, GradPtr> grads_;
mindspore::HashMap<Key, size_t> grads_accum_counter_;
mindspore::HashMap<Key, std::shared_ptr<PServerKernel>> embedding_lookup_ops_;
mindspore::HashMap<Key, uint64_t> tokens_;
std::mutex mutex_;
std::condition_variable apply_grads_cv_;
std::mutex access_weight_mutex_;
std::unique_ptr<std::thread> thread_;
std::unique_ptr<std::thread> persist_thread_;
std::shared_ptr<core::PSServerNode> server_node_;
std::map<Key, ParameterPtr> embedding_tables_;
friend class ServerHandler;
};
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_PARAMETER_SERVER_H_

View File

@ -1,108 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ps/ps_cache/embedding_hash_map.h"
namespace mindspore {
namespace ps {
int EmbeddingHashMap::ParseData(const int id, int *const swap_out_index, int *const swap_out_ids,
const size_t data_step, const size_t graph_running_step, size_t *const swap_out_size,
bool *const need_wait_graph) {
MS_EXCEPTION_IF_NULL(swap_out_index);
MS_EXCEPTION_IF_NULL(swap_out_ids);
MS_EXCEPTION_IF_NULL(swap_out_size);
bool need_swap = false;
auto hash_index = FindInsertionPos(data_step, graph_running_step, &need_swap, need_wait_graph);
if (hash_index == INVALID_INDEX_VALUE) {
return hash_index;
}
if (!need_swap) {
hash_count_++;
(void)hash_id_to_index_.emplace(id, hash_index);
hash_map_elements_[hash_index].set_id(id);
hash_map_elements_[hash_index].set_step(data_step);
return hash_index;
}
swap_out_index[*swap_out_size] = hash_index;
swap_out_ids[*swap_out_size] = hash_map_elements_[hash_index].id_;
(*swap_out_size)++;
(void)hash_id_to_index_.erase(hash_map_elements_[hash_index].id_);
(void)hash_id_to_index_.emplace(id, hash_index);
hash_map_elements_[hash_index].set_id(id);
hash_map_elements_[hash_index].set_step(data_step);
return hash_index;
}
int EmbeddingHashMap::FindInsertionPos(const size_t, const size_t graph_running_step, bool *const need_swap,
bool *const need_wait_graph) {
MS_EXCEPTION_IF_NULL(need_swap);
MS_EXCEPTION_IF_NULL(need_wait_graph);
int hash_index = INVALID_INDEX_VALUE;
while (!expired_element_full_) {
if (hash_map_elements_[current_pos_].IsEmpty()) {
hash_index = current_pos_;
hash_count_++;
} else if (hash_map_elements_[current_pos_].IsExpired(graph_running_step)) {
hash_index = current_pos_;
*need_swap = true;
} else if (hash_map_elements_[current_pos_].IsStep(graph_running_step)) {
graph_running_index_[graph_running_index_num_++] = current_pos_;
}
current_pos_ = (current_pos_ + 1) % hash_capacity_;
if (hash_index != INVALID_INDEX_VALUE) {
return hash_index;
}
if (current_pos_ == current_batch_start_pos_) {
expired_element_full_ = true;
MS_LOG(INFO) << "Running step:" << graph_running_step << "(num:" << graph_running_index_num_
<< ") will be used, index swap will wait until the graph completed.";
}
}
if (graph_running_index_pos_ != graph_running_index_num_) {
*need_swap = true;
*need_wait_graph = true;
return graph_running_index_[graph_running_index_pos_++];
}
return INVALID_INDEX_VALUE;
}
void EmbeddingHashMap::DumpHashMap() {
MS_LOG(INFO) << "Dump hash map info begin, hash_capacity: " << hash_capacity_ << " hash_count: " << hash_count_;
MS_LOG(INFO) << "Dump hash_id_to_index: ";
for (auto iter = hash_id_to_index_.begin(); iter != hash_id_to_index_.end(); ++iter) {
MS_LOG(INFO) << " id: " << iter->first << " index: " << iter->second;
}
MS_LOG(INFO) << "Dump hash_map_unit: ";
for (size_t i = 0; i < hash_map_elements_.size(); i++) {
if (!hash_map_elements_[i].IsEmpty()) {
MS_LOG(INFO) << " index: " << i << " id: " << hash_map_elements_[i].id_
<< " step: " << hash_map_elements_[i].step_;
}
}
MS_LOG(INFO) << "Dump hash map info end.";
}
void EmbeddingHashMap::Reset() {
current_batch_start_pos_ = current_pos_;
graph_running_index_num_ = 0;
graph_running_index_pos_ = 0;
expired_element_full_ = false;
}
} // namespace ps
} // namespace mindspore

View File

@ -1,89 +0,0 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_PS_CACHE_EMBEDDING_HASH_MAP_H_
#define MINDSPORE_CCSRC_PS_PS_CACHE_EMBEDDING_HASH_MAP_H_
#include <cmath>
#include <utility>
#include <memory>
#include <vector>
#include "utils/hash_map.h"
#include "utils/convert_utils_base.h"
namespace mindspore {
namespace ps {
static const size_t INVALID_STEP_VALUE = 0;
static const int INVALID_INDEX_VALUE = -1;
struct HashMapElement {
int id_{INVALID_INDEX_VALUE};
size_t step_{INVALID_STEP_VALUE};
bool IsEmpty() const { return step_ == INVALID_STEP_VALUE; }
bool IsExpired(size_t graph_running_step) const { return graph_running_step > step_; }
bool IsStep(size_t step) const { return step_ == step; }
void set_id(int id) { id_ = id; }
void set_step(size_t step) { step_ = step; }
};
// Hash table is held in device, HashMap is used to manage hash table in host.
class EmbeddingHashMap {
public:
EmbeddingHashMap(size_t hash_count, size_t hash_capacity)
: hash_count_(hash_count),
hash_capacity_(hash_capacity),
current_pos_(0),
current_batch_start_pos_(0),
graph_running_index_num_(0),
graph_running_index_pos_(0),
expired_element_full_(false) {
hash_map_elements_.resize(hash_capacity);
// In multi-device mode, embedding table are distributed on different devices by ID interval,
// and IDs outside the range of local device will use the front and back positions of the table,
// the positions are reserved for this.
hash_map_elements_.front().set_step(SIZE_MAX);
hash_map_elements_.back().set_step(SIZE_MAX);
graph_running_index_ = std::make_unique<int[]>(hash_capacity);
}
virtual ~EmbeddingHashMap() = default;
int ParseData(const int id, int *const swap_out_index, int *const swap_out_ids, const size_t data_step,
const size_t graph_running_step, size_t *const swap_out_size, bool *const need_wait_graph);
size_t hash_step(const int hash_index) const { return hash_map_elements_[IntToSize(hash_index)].step_; }
void set_hash_step(const int hash_index, const size_t step) {
hash_map_elements_[IntToSize(hash_index)].set_step(step);
}
const mindspore::HashMap<int, int> &hash_id_to_index() const { return hash_id_to_index_; }
size_t hash_capacity() const { return hash_capacity_; }
void DumpHashMap();
void Reset();
private:
int FindInsertionPos(const size_t data_step, const size_t graph_running_step, bool *const need_swap,
bool *const need_wait_graph);
size_t hash_count_;
size_t hash_capacity_;
std::vector<HashMapElement> hash_map_elements_;
mindspore::HashMap<int, int> hash_id_to_index_;
size_t current_pos_;
size_t current_batch_start_pos_;
size_t graph_running_index_num_;
size_t graph_running_index_pos_;
std::unique_ptr<int[]> graph_running_index_;
bool expired_element_full_;
};
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_PS_CACHE_EMBEDDING_HASH_MAP_H_

View File

@ -1,47 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_BASIC_H
#define MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_BASIC_H
#include <utility>
#include <memory>
namespace mindspore {
namespace ps {
class PsCacheBasic {
public:
PsCacheBasic() = default;
virtual ~PsCacheBasic() = default;
virtual bool InitDevice(uint32_t device_id, const void *context) = 0;
virtual void *MallocMemory(size_t size) = 0;
virtual bool MallocConstantMemory(size_t) { return true; }
virtual void FreeMemory(void *buf) = 0;
virtual bool RecordEvent() = 0;
virtual bool SynchronizeEvent() = 0;
virtual bool SynchronizeStream() = 0;
virtual bool CopyHostMemToDevice(void *dst, const void *src, size_t size) = 0;
virtual bool CopyDeviceMemToHost(void *dst, const void *src, size_t size) = 0;
virtual bool HashSwapOut(void *hash_table_addr, void *swap_out_value_addr, void *swap_out_index_addr,
size_t cache_vocab_size, size_t embedding_size, size_t swap_out_size) = 0;
virtual bool HashSwapIn(void *hash_table_addr, void *swap_in_value_addr, void *swap_in_index_addr,
size_t cache_vocab_size, size_t embedding_size, size_t swap_in_size) = 0;
protected:
void *stream_;
};
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_BASIC_H

View File

@ -1,42 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ps/ps_cache/ps_cache_factory.h"
#include "utils/log_adapter.h"
namespace mindspore {
namespace ps {
PsCacheFactory &PsCacheFactory::Get() {
static PsCacheFactory instance;
return instance;
}
void PsCacheFactory::Register(const std::string &device_name, PsCacheCreator &&ps_cache_creator) {
if (ps_cache_creators_.end() == ps_cache_creators_.find(device_name)) {
(void)ps_cache_creators_.emplace(device_name, ps_cache_creator);
}
}
std::shared_ptr<PsCacheBasic> PsCacheFactory::ps_cache(const std::string &device_name) {
auto iter = ps_cache_creators_.find(device_name);
if (ps_cache_creators_.end() != iter) {
MS_EXCEPTION_IF_NULL(iter->second);
return (iter->second)();
}
return nullptr;
}
} // namespace ps
} // namespace mindspore

View File

@ -1,57 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_FACTORY_H_
#define MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_FACTORY_H_
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include "ps/ps_cache/ps_cache_basic.h"
#include "utils/ms_utils.h"
namespace mindspore {
namespace ps {
using PsCacheCreator = std::function<std::shared_ptr<PsCacheBasic>()>;
class PsCacheFactory {
public:
static PsCacheFactory &Get();
void Register(const std::string &device_name, PsCacheCreator &&ps_cache_creator);
std::shared_ptr<PsCacheBasic> ps_cache(const std::string &device_name);
private:
PsCacheFactory() = default;
~PsCacheFactory() = default;
DISABLE_COPY_AND_ASSIGN(PsCacheFactory)
std::map<std::string, PsCacheCreator> ps_cache_creators_;
};
class PsCacheRegistrar {
public:
PsCacheRegistrar(const std::string &device_name, PsCacheCreator &&ps_cache_creator) {
PsCacheFactory::Get().Register(device_name, std::move(ps_cache_creator));
}
~PsCacheRegistrar() = default;
};
#define MS_REG_PS_CACHE(DEVICE_NAME, PS_CACHE_CLASS) \
static const PsCacheRegistrar g_ps_cache_registrar__##DEVICE_NAME##_##_reg( \
DEVICE_NAME, []() { return std::make_shared<PS_CACHE_CLASS>(); });
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_FACTORY_H_

File diff suppressed because it is too large Load Diff

View File

@ -1,218 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_MANAGER_H_
#define MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_MANAGER_H_
#include <map>
#include <string>
#include <vector>
#include <thread>
#include <atomic>
#include <utility>
#include <memory>
#include <condition_variable>
#include "utils/ms_context.h"
#include "kernel/kernel.h"
#include "utils/shape_utils.h"
#include "ir/tensor.h"
#include "ps/constants.h"
#include "ps/worker.h"
#include "ps/ps_context.h"
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
#include "ps/ps_cache/embedding_hash_map.h"
#include "ps/ps_cache/ps_cache_factory.h"
#include "include/backend/visible.h"
namespace mindspore {
namespace ps {
constexpr size_t kHostCacheScaleFactor = 10;
constexpr size_t kMaxThreadNum = 16;
constexpr size_t kMaxIdsPerThread = 10000;
using mindspore::kernel::Address;
struct HashTableInfo {
size_t cache_vocab_size{0};
size_t host_cache_vocab_size{0};
size_t embedding_size{0};
size_t vocab_size{0};
Address device_address{nullptr, 0};
std::shared_ptr<float> host_address{nullptr};
ParamInitInfo param_init_info_;
int32_t param_key_{-1};
};
struct EmbeddingDeviceCache {
EmbeddingDeviceCache(size_t batch_elements, size_t cache_vocab_size)
: hash_swap_index_addr_(nullptr), hash_swap_value_addr_(nullptr) {
device_to_host_index = std::make_unique<int[]>(batch_elements);
device_to_host_ids = std::make_unique<int[]>(batch_elements);
host_to_device_index = std::make_unique<int[]>(batch_elements);
host_to_device_ids = std::make_unique<int[]>(batch_elements);
device_hash_map_ = std::make_shared<EmbeddingHashMap>(0, cache_vocab_size);
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto devcie_target = context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET);
cache_ = PsCacheFactory::Get().ps_cache(devcie_target);
}
std::unique_ptr<int[]> device_to_host_index;
std::unique_ptr<int[]> device_to_host_ids;
std::unique_ptr<int[]> host_to_device_index;
std::unique_ptr<int[]> host_to_device_ids;
int *hash_swap_index_addr_;
float *hash_swap_value_addr_;
std::shared_ptr<EmbeddingHashMap> device_hash_map_;
std::shared_ptr<PsCacheBasic> cache_;
};
struct EmbeddingHostCache {
EmbeddingHostCache(size_t batch_elements, size_t host_cache_vocab_size) {
host_to_server_index = std::make_unique<int[]>(batch_elements);
host_to_server_ids = std::make_unique<int[]>(batch_elements);
server_to_host_index = std::make_unique<int[]>(batch_elements);
server_to_host_ids = std::make_unique<int[]>(batch_elements);
host_to_device_index = std::make_unique<int[]>(batch_elements);
device_to_host_index = std::make_unique<int[]>(batch_elements);
host_hash_map_ = std::make_shared<EmbeddingHashMap>(0, host_cache_vocab_size);
}
std::unique_ptr<int[]> host_to_server_index;
std::unique_ptr<int[]> host_to_server_ids;
std::unique_ptr<int[]> server_to_host_index;
std::unique_ptr<int[]> server_to_host_ids;
std::unique_ptr<int[]> host_to_device_index;
std::unique_ptr<int[]> device_to_host_index;
std::shared_ptr<EmbeddingHashMap> host_hash_map_;
};
struct PsCacheStatisticsInfo {
size_t batch_id_count_{0};
size_t batch_id_unique_count_{0};
size_t device_to_host_size_{0};
size_t host_to_device_size_{0};
size_t host_to_server_size_{0};
size_t server_to_host_size_{0};
size_t hash_hit_count_{0};
size_t mem_cache_swap_out_size_{0};
size_t mem_cache_swap_in_size_{0};
size_t mem_cache_hit_count_{0};
};
class BACKEND_EXPORT PsCacheManager {
public:
static PsCacheManager &GetInstance();
void Initialize();
void InsertHashTableSize(const std::string &param_name, size_t cache_vocab_size, size_t embedding_size,
size_t vocab_size);
void InsertWeightInitInfo(const std::string &param_name, size_t global_seed, size_t op_seed);
void InsertAccumuInitInfo(const std::string &param_name, float init_val);
void ReInsertHashTableSize(const std::string &new_param_name, const std::string &cur_param_name,
size_t cache_vocab_size, size_t embedding_size);
void CloneHashTable(const std::string &dest_param_name, const std::string &src_param_name);
const Address &QueryHashTableAddr(const std::string &param_name) const;
const size_t &QueryHashTableSize(const std::string &param_name) const;
bool IsHashTable(const std::string &param_name) { return hash_tables_.count(param_name) != 0; }
void set_batch_elements(size_t batch_elements) { batch_elements_ = batch_elements; }
void set_rank_id(uint32_t rank_id) { rank_id_ = rank_id; }
bool initialized_ps_cache() const { return initialized_ps_cache_; }
size_t vocab_cache_size() const { return vocab_cache_size_; }
int cache_indices_lower_bound() const;
void DoProcessData(uint32_t device_id, const void *context);
void IncreaseGraphStep(const std::string &channel_name);
void SyncEmbeddingTable();
void Finalize();
void DumpHashTables(bool dump_device_tables = false) const;
private:
PsCacheManager() = default;
~PsCacheManager() = default;
PsCacheManager(const PsCacheManager &) = delete;
PsCacheManager &operator=(const PsCacheManager &) = delete;
bool IncreaseStep();
void set_current_graph_step() { graph_running_step_ = graph_step_; }
std::string channel_name();
void set_channel_name(const std::string channel_name);
bool InitParameterServer();
void InitDataChannel();
void AllocMemForHashTable();
void SetLocalIdRank();
void ProcessDataTask(uint32_t device_id, const void *context);
bool ProcessData();
bool ParseData(const int *batch_ids, const size_t batch_ids_len, int *hash_index);
bool WaitGraphRun();
bool ParseDeviceData(size_t id, bool *need_swap_device_to_host, bool *need_swap_host_to_device, int *hash_index);
bool ParseHostDataHostToDevice(size_t id);
bool ParseHostDataDeviceToHost();
bool HashSwapDeviceOut(int *swap_out_index, std::vector<float> *swap_out_data, const HashTableInfo &hash_info);
bool HashSwapDeviceIn(const int *swap_in_ids, const int *swap_in_index, const HashTableInfo &hash_info, size_t key);
bool HashSwapHostToDevice(const HashTableInfo &hash_info);
bool HashSwapDeviceToHost(const HashTableInfo &hash_info);
bool HashSwapHostToServer(size_t key, const HashTableInfo &hash_info);
bool HashSwapServerToHost(size_t key, const HashTableInfo &hash_info);
bool InsertHostHashTable(size_t embedding_size, size_t insert_indices_size, const int *insert_indices,
const float *insert_data, float *hash_table_addr);
bool LookUpHostHashTable(size_t embedding_size, size_t indices_lens, const float *hash_table_addr,
const int *indices_addr, float *output_addr);
bool UpdataEmbeddingTable(const std::vector<float> &swap_out_data, int *const swap_out_ids, size_t key);
void LookUpTableTask(size_t indices_lens, size_t outer_dim_size, size_t first_dim_size, const float *input_addr,
const int *indices_addr, float *output_addr);
bool CheckFinishInsertInitInfo() const;
void AddEmbeddingTable() const;
void DumpStatisticsInfo(size_t each_print_step = 1000);
bool SyncHostEmbeddingTable();
bool SyncDeviceEmbeddingTable();
bool CheckCacheHitOrOutRangeTask(const int *batch_ids, const size_t batch_ids_len, int *hash_index, bool *in_device,
bool *out_range, size_t *hash_hit_count);
bool CheckCacheHitOrOutRange(const int *batch_ids, const size_t batch_ids_len, int *hash_index, bool *in_device,
bool *out_range);
bool ResetEmbeddingHashMap();
bool initialized_ps_cache_{false};
std::string channel_name_;
std::mutex channel_mutex_;
std::atomic_ulong graph_step_{0};
size_t graph_running_step_{0};
size_t data_step_{0};
std::mutex data_mutex_;
std::condition_variable data_prase_;
std::condition_variable insert_init_info_;
std::thread process_data_thread_;
std::map<std::string, HashTableInfo> hash_tables_;
std::shared_ptr<EmbeddingDeviceCache> embedding_device_cache_;
std::shared_ptr<EmbeddingHostCache> embedding_host_cache_;
size_t vocab_size_{0};
size_t vocab_cache_size_{0};
size_t host_vocab_cache_size_{0};
size_t batch_elements_{0};
PsCacheStatisticsInfo statistics_info_;
std::pair<int, int> emb_table_slice_bounds_;
std::pair<int, int> cache_indices_bounds_;
int vocab_cache_size_diff_{0};
uint32_t rank_id_{0};
std::atomic_bool finish_insert_init_info_{false};
std::atomic_bool finish_init_parameter_server_{false};
std::atomic_bool running_{false};
std::atomic_bool finalized_{false};
bool finish_embedding_table_sync_{false};
bool device_need_wait_graph_{false};
bool host_need_wait_graph_{false};
};
static PsCacheManager &ps_cache_instance = PsCacheManager::GetInstance();
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_PS_CACHE_PS_CACHE_MANAGER_H_

View File

@ -21,7 +21,6 @@
#include "utils/ms_utils.h"
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#include "distributed/cluster/cluster_context.h"
#include "ps/ps_cache/ps_cache_manager.h"
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
#include "distributed/embedding_cache/embedding_cache_utils.h"
#else
@ -98,7 +97,6 @@ void PSContext::Reset() {
is_sched_ = false;
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
if (ps::PsDataPrefetch::GetInstance().cache_enable()) {
ps_cache_instance.Finalize();
set_cache_enable(false);
}
#endif
@ -166,7 +164,6 @@ void PSContext::InsertHashTableSize(const std::string &param_name, size_t cache_
embedding_cache_table_manager.InsertHashTableSize(param_name, cache_vocab_size, embedding_size, vocab_size,
param_key);
}
ps_cache_instance.InsertHashTableSize(param_name, cache_vocab_size, embedding_size, vocab_size);
#endif
}
@ -177,21 +174,12 @@ void PSContext::ReInsertHashTableSize(const std::string &new_param_name, const s
embedding_cache_table_manager.ReInsertHashTableSize(new_param_name, cur_param_name, cache_vocab_size,
embedding_size);
}
ps_cache_instance.ReInsertHashTableSize(new_param_name, cur_param_name, cache_vocab_size, embedding_size);
#endif
}
void PSContext::InsertWeightInitInfo(const std::string &param_name, size_t global_seed, size_t op_seed) const {
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
ps_cache_instance.InsertWeightInitInfo(param_name, global_seed, op_seed);
#endif
}
void PSContext::InsertWeightInitInfo(const std::string &, size_t, size_t) const { return; }
void PSContext::InsertAccumuInitInfo(const std::string &param_name, float init_val) const {
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
ps_cache_instance.InsertAccumuInitInfo(param_name, init_val);
#endif
}
void PSContext::InsertAccumuInitInfo(const std::string &, float) const { return; }
void PSContext::CloneHashTable(const std::string &dest_param_name, int32_t dest_param_key,
const std::string &src_param_name, int32_t src_param_key) const {
@ -199,7 +187,6 @@ void PSContext::CloneHashTable(const std::string &dest_param_name, int32_t dest_
if (enable_distributed_mindrt()) {
embedding_cache_table_manager.CloneHashTable(dest_param_name, dest_param_key, src_param_name, src_param_key);
}
ps_cache_instance.CloneHashTable(dest_param_name, src_param_name);
#endif
}
@ -216,11 +203,7 @@ bool PSContext::cache_enable() const {
return false;
}
void PSContext::set_rank_id(uint32_t rank_id) const {
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
ps_cache_instance.set_rank_id(rank_id);
#endif
}
void PSContext::set_rank_id(uint32_t) const { return; }
void PSContext::set_server_mode(const std::string &server_mode) {
if (server_mode != kServerModePS && server_mode != kServerModeFL && server_mode != kServerModeHybrid) {

View File

@ -1,48 +0,0 @@
/**
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ps/random_normal/random_normal.h"
#include <random>
#include "include/common/random.h"
#include "utils/log_adapter.h"
namespace mindspore::ps {
bool InitRandomNormal(float mean, float stddev, std::vector<size_t> out_shape, size_t global_seed, size_t op_seed,
float *output_data) {
// Check output data pointer.
if (output_data == nullptr) {
MS_LOG(ERROR) << "output data is null.";
return false;
}
// Check shape.
if (out_shape.size() == 0) {
MS_LOG(ERROR) << "output data shape is empty.";
return false;
}
// Calculate data size from shape.
size_t data_size = 1;
for (size_t i = 0; i < out_shape.size(); ++i) {
data_size *= out_shape[i];
}
// Generate randoms parallel.
constexpr int seed_shift = 32;
const uint64_t seed = (global_seed << seed_shift) + op_seed;
using Generator = random::Philox;
using Distribution = random::NormalDistribution<float>;
random::GenerateRandomsParallel<float, Generator, Distribution>(seed, output_data, data_size, mean, stddev);
return true;
}
} // namespace mindspore::ps

View File

@ -1,27 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_RANDOM_NORMAL_RANDOM_NORMAL_H_
#define MINDSPORE_CCSRC_PS_RANDOM_NORMAL_RANDOM_NORMAL_H_
#include <vector>
namespace mindspore {
namespace ps {
bool InitRandomNormal(float mean, float stddev, std::vector<size_t> out_shape, size_t global_seed, size_t op_seed,
float *output_data);
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_RANDOM_NORMAL_RANDOM_NORMAL_H_

View File

@ -1,971 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ps/worker.h"
#include "pipeline/jit/pipeline.h"
namespace mindspore {
namespace ps {
namespace {
constexpr int kRetryDuration = 2000;
} // namespace
Worker &Worker::GetInstance() {
static Worker instance{};
return instance;
}
void Worker::Run() {
std::lock_guard<std::mutex> lock(running_mutex_);
server_num_ = PSContext::instance()->initial_server_num();
if (running_) {
MS_LOG(INFO) << "'Worker is already running.";
return;
}
if (!PSContext::instance()->is_worker()) {
MS_LOG(EXCEPTION) << "The role is not worker.";
}
Initialize();
worker_node_.RegisterEventCallback(core::ClusterEvent::SCHEDULER_TIMEOUT, [this]() {
MS_LOG(ERROR) << "Trigger timeout event: SCHEDULER_TIMEOUT begin to exit the system!";
this->Finalize();
exit(0);
});
worker_node_.RegisterEventCallback(core::ClusterEvent::NODE_TIMEOUT, [this]() {
MS_LOG(ERROR) << "Trigger timeout event: NODE_TIMEOUT begin to exit the system!";
this->Finalize();
exit(0);
});
MS_LOG(INFO) << "Worker starts connecting to scheduler and server...";
worker_node_.Start();
MS_LOG(INFO) << "Worker connected successfully.";
running_ = true;
}
void Worker::Push(const std::vector<size_t> &keys, std::vector<uintptr_t> addrs, const ShapeVector &sizes) {
if (keys.size() == 0) {
MS_LOG(EXCEPTION) << "key size should be greater than zero";
}
if (key_to_optimId_.count(keys[0]) == 0) {
MS_LOG(EXCEPTION) << "no optim id found for key" << keys[0];
}
Key key = keys[0];
int64_t optim_id = key_to_optimId_[key];
MS_LOG(INFO) << "The key is:" << key << " the optim_id:" << optim_id;
bool is_sparse = false;
if (optim_id == 1 || optim_id == kSparseLazyAdamIndex || optim_id == kSparseFtrlIndex) {
is_sparse = true;
}
int64_t grad_index = -1;
int64_t indice_index = -1;
// Sparse adam gradient
if (optim_id == 1 || optim_id == kSparseLazyAdamIndex) {
grad_index = kSparseGradIndex;
indice_index = kSparseIndiceIndex;
// Sparse ftrl gradient
} else if (optim_id == kSparseFtrlIndex) {
grad_index = 0;
indice_index = 1;
}
size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0, std::plus<int64_t>());
std::vector<float> total_buffer(total_size, 0);
size_t offset = 0;
for (size_t i = 0; i < sizes.size(); i++) {
void *dst_data = total_buffer.data() + offset / sizeof(float);
void *src_data = reinterpret_cast<void *>(addrs[i]);
MS_EXCEPTION_IF_NULL(dst_data);
MS_EXCEPTION_IF_NULL(src_data);
size_t size = sizes[i] * sizeof(float);
size_t dest_size = size;
size_t src_size = size;
errno_t ret = memcpy_s(dst_data, dest_size, src_data, src_size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
return;
}
offset += size;
}
MS_LOG(INFO) << "The total size is:" << total_size;
while (running_ && (!IsReadyForPush(keys[0]))) {
continue;
}
std::vector<int> sizes_int;
(void)std::transform(sizes.begin(), sizes.end(), std::back_inserter(sizes_int),
[](const int64_t &value) { return static_cast<int>(value); });
if (!is_sparse) {
PushData(std::vector<Key>(keys), total_buffer, std::vector<int>(sizes_int), kPushCmd);
} else {
std::vector<int64_t> &var_shape = key_to_optim_shapes_[key][0];
int64_t first_dim_size = var_shape[0];
int64_t outer_dim_size = std::accumulate(var_shape.begin() + 1, var_shape.end(), 1, std::multiplies<int64_t>());
MS_LOG(DEBUG) << "The keys:" << keys << " the total_buffer:" << total_buffer << " the sizes_int:" << sizes_int
<< " the grad_index:" << grad_index << " the indice_index:" << indice_index
<< " the first_dim_size:" << first_dim_size << " the outer_dim_size" << outer_dim_size;
PushSparseData(std::vector<Key>(keys), total_buffer, std::vector<int>(sizes_int), LongToSize(grad_index),
LongToSize(indice_index), LongToSize(first_dim_size), LongToSize(outer_dim_size));
}
}
void Worker::Pull(const size_t key, void *dev_addr, const size_t size) {
MS_EXCEPTION_IF_NULL(dev_addr);
std::vector<float> variables(size / sizeof(float), 0);
while (running_ && (!IsReadyForPull(key))) {
continue;
}
PullData({key}, &variables, nullptr, kPullCmd);
MS_LOG(DEBUG) << "The variables:" << variables << " the size is:" << size;
size_t dst_size = size;
size_t src_size = size;
errno_t ret = memcpy_s(dev_addr, dst_size, variables.data(), src_size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
return;
}
}
size_t Worker::SetParamKey(const std::string &param_name) {
size_t key = UINT64_MAX;
if (param_to_key_.count(param_name)) {
key = param_to_key_[param_name];
MS_LOG(INFO) << param_name << " key is already set: key value is " << key;
} else {
key = key_cnt_++;
param_to_key_[param_name] = key;
MS_LOG(INFO) << "Set key " << key << " for parameter " << param_name;
}
return key;
}
size_t Worker::GetParamKey(const std::string &param_name) {
size_t key = kInvalidKey;
if (param_to_key_.find(param_name) != param_to_key_.end()) {
key = param_to_key_[param_name];
MS_LOG(DEBUG) << "Get key of parameter " << param_name << " key is " << key;
}
return key;
}
void Worker::SetParamInitInServer(const std::string &param_name, bool init_in_server) {
MS_LOG(DEBUG) << "Set parameter " << param_name << " init_in_server:" << init_in_server;
param_to_init_in_server_[param_name] = init_in_server;
}
bool Worker::GetParamInitInServer(const std::string &param_name) {
if (param_to_init_in_server_.count(param_name) == 0) {
return false;
}
return param_to_init_in_server_[param_name];
}
void Worker::SetKeyOptimId(size_t key, const std::string &optimizer_name) {
MS_LOG(INFO) << "SetKeyOptimId key is:" << key << " optimizer_name:" << optimizer_name;
key_to_optimId_[key] = Util::optimizer_id(optimizer_name);
}
void Worker::SetOptimInputShapes(size_t key, const ShapeVector &shape) {
if (key_to_optim_shapes_.find(key) == key_to_optim_shapes_.end()) {
key_to_optim_shapes_[key] = {shape};
} else {
key_to_optim_shapes_[key].push_back(shape);
}
}
void Worker::AddEmbeddingTable(const Key &key, const size_t &row_count) {
bool has_init = IsKeyInit(key);
if (has_init) {
return;
}
uint64_t begin = 0;
uint64_t end = 0;
for (int64_t i = 0; i < server_num_; i++) {
size_t local_row_cnt = LongToSize(Util::LocalShard(row_count, i, server_num_));
MS_LOG(DEBUG) << "The row_count:" << row_count << " the local_row_cnt:" << local_row_cnt;
if (i == 0) {
end = local_row_cnt - 1;
} else {
begin = end + 1;
end += local_row_cnt;
}
EmbeddingTableShardMetadata range(begin, end);
if (embedding_table_ranges_.count(key) == 0) {
embedding_table_ranges_[key] = std::make_shared<std::vector<EmbeddingTableShardMetadata>>();
MS_EXCEPTION_IF_NULL(embedding_table_ranges_[key]);
}
embedding_table_ranges_[key]->push_back(range);
}
embedding_row_cnt_[key] = row_count;
}
bool Worker::InitPSEmbeddingTable(const size_t &key, const std::vector<size_t> &input_shape,
const std::vector<size_t> &indices_shape, const std::vector<size_t> &output_shape,
const ParamInitInfoMessage &info, uint32_t timeout) {
bool has_init = IsKeyInit(key);
if (has_init) {
MS_LOG(DEBUG) << "The key embedding table of key " << key << " is initialized.";
return true;
}
EmbeddingTableMeta embedding_table_meta;
embedding_table_meta.set_key(key);
*embedding_table_meta.mutable_input_shape() = {input_shape.begin(), input_shape.end()};
*embedding_table_meta.mutable_indices_shape() = {indices_shape.begin(), indices_shape.end()};
*embedding_table_meta.mutable_output_shape() = {output_shape.begin(), output_shape.end()};
*embedding_table_meta.mutable_info() = info;
const std::string &kv_data = embedding_table_meta.SerializeAsString();
while (!worker_node_.Broadcast(core::NodeRole::SERVER, kv_data, kInitEmbeddingsCmd, timeout)) {
MS_LOG(INFO) << "Worker Broadcast failed!, retrying.";
if (!running_) {
MS_LOG(ERROR) << "Worker Broadcast failed!";
return false;
}
std::this_thread::sleep_for(std::chrono::milliseconds(kRetryDuration));
}
return true;
}
void Worker::InitPSParamAndOptim(const AnfNodePtr &input_node, const tensor::TensorPtr &tensor) {
MS_EXCEPTION_IF_NULL(tensor);
MS_EXCEPTION_IF_NULL(input_node);
auto pk_node = input_node->cast<ParameterPtr>();
MS_EXCEPTION_IF_NULL(pk_node);
const std::string &param_name = pk_node->fullname_with_scope();
void *param_data = tensor->data_c();
size_t param_size = LongToSize(tensor->data().nbytes());
size_t param_key = GetParamKey(param_name);
if (param_key == kInvalidKey) {
MS_LOG(DEBUG) << "Parameter " << param_name << " has no key assigned.";
return;
}
bool init_in_server = false;
auto param_info_ptr = pk_node->param_info();
if (param_info_ptr != nullptr && param_info_ptr->init_in_server()) {
init_in_server = true;
}
SetParamInitInServer(param_name, init_in_server);
bool init = IsKeyInit(param_key);
if (!init) {
MS_LOG(DEBUG) << "Init parameter key " << param_key << " and optimizer in parameter server side for " << param_name
<< ", whether init in server: " << init_in_server;
AddKeyToServerId(param_key);
if (!PsDataPrefetch::GetInstance().cache_enable()) {
if (!init_in_server) {
if (param_size > INT_MAX) {
MS_LOG(EXCEPTION) << "PS mode max weight size is " << INT_MAX << ", " << param_name << " size is "
<< param_size;
}
InitPSParamData({param_key}, param_data, param_size);
}
InitPSOptimId(param_key);
InitPSOptimInputShapes(param_key);
}
}
}
bool Worker::DoPSEmbeddingLookup(const Key &key, const std::vector<int> &lookup_ids, std::vector<float> *lookup_result,
int64_t cmd) {
MS_EXCEPTION_IF_NULL(lookup_result);
EmbeddingTableLookup embedding_table_lookup;
embedding_table_lookup.set_key(key);
*embedding_table_lookup.mutable_keys() = {lookup_ids.begin(), lookup_ids.end()};
PartitionEmbeddingMessages messages;
lookup_partitioner_(embedding_table_lookup, &messages, {});
std::vector<uint32_t> rank_ids;
std::vector<std::string> data_strs;
for (size_t i = 0; i < messages.size(); i++) {
if (messages.at(i).first) {
rank_ids.push_back(i);
data_strs.emplace_back(messages.at(i).second.SerializeAsString());
}
}
std::vector<VectorPtr> resp;
while (!worker_node_.Send(core::NodeRole::SERVER, rank_ids, data_strs, LongToInt(cmd), &resp)) {
MS_LOG(INFO) << "Worker send failed!, retrying.";
if (!running_) {
MS_LOG(ERROR) << "Worker send failed!";
return false;
}
std::this_thread::sleep_for(std::chrono::milliseconds(kRetryDuration));
}
int64_t single_id_len = SizeToLong(lookup_result->size() / lookup_ids.size());
mindspore::HashMap<Key, std::shared_ptr<std::pair<float *, int64_t>>> id_addr_map;
std::shared_ptr<std::vector<float>> values = std::make_shared<std::vector<float>>();
std::shared_ptr<std::vector<Key>> keys = std::make_shared<std::vector<Key>>();
int64_t value_offset = 0;
for (size_t i = 0; i < resp.size(); ++i) {
KVMessage message;
CHECK_RETURN_TYPE(message.ParseFromArray(resp.at(i)->data(), resp.at(i)->size()));
for (auto j = 0; j < message.values_size(); j++) {
values->push_back(message.values(j));
}
for (auto k = 0; k < message.keys_size(); k++) {
const Key &message_key = message.keys(k);
keys->push_back(message_key);
}
}
for (size_t i = 0; i < keys->size(); i++) {
const Key &map_key = keys->at(i);
float *addr = values->data() + value_offset;
value_offset += single_id_len;
id_addr_map[map_key] = std::make_shared<std::pair<float *, int64_t>>(std::make_pair(addr, single_id_len));
}
float *result_addr = lookup_result->data();
MS_EXCEPTION_IF_NULL(result_addr);
int64_t offset = 0;
size_t dst_size = 0;
size_t src_size = 0;
void *dst_data = nullptr;
void *src_data = nullptr;
for (size_t i = 0; i < lookup_ids.size(); i++) {
if (id_addr_map.count(lookup_ids[i]) == 0) {
offset += single_id_len;
continue;
}
const Key &id_key = static_cast<Key>(lookup_ids[i]);
auto &pair = id_addr_map[id_key];
size_t size = LongToSize(single_id_len * sizeof(float));
dst_size = size;
src_size = size;
dst_data = result_addr + offset;
src_data = pair->first;
MS_ERROR_IF_NULL(dst_data);
MS_ERROR_IF_NULL(src_data);
errno_t mem_ret = memcpy_s(dst_data, dst_size, src_data, src_size);
if (mem_ret != EOK) {
MS_LOG(ERROR) << "memcpy_s error, errorno(" << mem_ret << ")";
return false;
}
offset += single_id_len;
}
return true;
}
bool Worker::UpdateEmbeddingTable(const std::vector<Key> &keys, const std::vector<int> &lookup_ids,
const std::vector<float> &vals) {
KVMessage kvs;
*kvs.mutable_keys() = {keys.begin(), keys.end()};
*kvs.mutable_len() = {lookup_ids.begin(), lookup_ids.end()};
*kvs.mutable_values() = {vals.begin(), vals.end()};
PartitionKVMessages messages;
update_embedding_partitioner_(kvs, &messages, {});
std::vector<uint32_t> rank_ids;
std::vector<std::string> data_strs;
for (size_t i = 0; i < messages.size(); i++) {
if (messages.at(i).first) {
rank_ids.push_back(i);
data_strs.emplace_back(messages.at(i).second.SerializeAsString());
}
}
while (!worker_node_.Send(core::NodeRole::SERVER, rank_ids, data_strs, LongToInt(kUpdateEmbeddingsCmd))) {
MS_LOG(INFO) << "Worker send failed!, retrying.";
if (!running_) {
MS_LOG(ERROR) << "Worker send failed!";
return false;
}
std::this_thread::sleep_for(std::chrono::milliseconds(kRetryDuration));
}
return true;
}
void Worker::Finalize() {
if (running_) {
MS_LOG(INFO) << "Worker starts finalizing...";
KVMessage kvs;
kvs.add_keys(0);
kvs.add_values(0.0f);
const std::string &kv_data = kvs.SerializeAsString();
worker_node_.Broadcast(core::NodeRole::SERVER, kv_data, kFinalizeCmd);
worker_node_.Finish();
worker_node_.Stop();
running_ = false;
MS_LOG(INFO) << "Worker finalized successfully.";
}
}
void Worker::Initialize() {
lookup_partitioner_ = [this](auto &&send, auto &&partition, auto &&attrs) {
LookupIdPartitioner(send, partition, attrs);
};
worker_init_embedding_partitioner_ = [this](auto &&send, auto &&partition, auto &&attrs) {
WorkerInitEmbeddingPartitioner(send, partition, attrs);
};
round_robin_partitioner_ = [this](auto &&send, auto &&partition, auto &&attrs) {
RoundRobinPartitioner(send, partition, attrs);
};
sparse_partitioner_ = [this](auto &&send, auto &&partition, auto &&attrs) {
SparsePartitioner(send, partition, attrs);
};
update_embedding_partitioner_ = [this](auto &&send, auto &&partition, auto &&attrs) {
UpdateEmbeddingPartitioner(send, partition, attrs);
};
broadcast_partitioner_ = [this](auto &&send, auto &&partition, auto &&attrs) {
BroadcastPartitioner(send, partition, attrs);
};
}
bool Worker::IsKeyInit(const size_t key) {
if (init_keys_.find(key) == init_keys_.end() || !init_keys_[key]) {
return false;
}
return true;
}
void Worker::AddKeyToServerId(const Key &key) { AddKeyByHashMod(key); }
void Worker::AddKeyByHashMod(const Key &key) {
if (server_num_ == 0) {
MS_LOG(EXCEPTION) << "Server number is invalid:0";
}
key_to_server_id_[key] = static_cast<int64_t>(key % server_num_);
MS_LOG(DEBUG) << "The server id of key " << key << " is " << key_to_server_id_[key];
}
void Worker::InitPSOptimId(const size_t param_key) {
MS_LOG(INFO) << "InitPSOptimId key is:" << param_key;
if (key_to_optimId_.count(param_key) == 0) {
MS_LOG(EXCEPTION) << "Can't find optimizer id of parameter key " << param_key;
}
int64_t optim_id = key_to_optimId_[param_key];
std::vector<Key> keys = {param_key};
std::vector<float> optim_id_vals = {static_cast<float>(optim_id)};
std::vector<int> optim_id_lens = {SizeToInt(optim_id_vals.size())};
MS_LOG(INFO) << "The keys is" << keys << " the optim_id_vals is: " << optim_id_vals
<< " optim_id_lens is:" << optim_id_lens;
PushData(keys, optim_id_vals, optim_id_lens, kInitWeightToOptimIdCmd);
}
void Worker::InitPSOptimInputShapes(const size_t key) {
std::vector<Key> keys;
std::vector<int> shape_len;
std::vector<float> all_shape;
std::vector<ShapeVector> shapes = key_to_optim_shapes_[key];
for (auto shape : shapes) {
keys.push_back(key);
if (shape.size() == 0) {
shape_len.push_back(1);
all_shape.push_back(1);
} else {
shape_len.push_back(SizeToLong(shape.size()));
std::transform(shape.begin(), shape.end(), std::back_inserter(all_shape),
[](size_t dim) -> float { return static_cast<float>(dim); });
}
}
MS_LOG(INFO) << "keys:" << keys;
MS_LOG(INFO) << "shape_len:" << shape_len;
MS_LOG(INFO) << "all_shape:" << all_shape;
if (!init_keys_[key]) {
init_keys_[key] = true;
}
PushData(keys, all_shape, shape_len, kInitOptimInputsShapeCmd);
}
void Worker::InitPSParamData(const std::vector<size_t> &keys, void *const origin_addr, size_t size) {
MS_EXCEPTION_IF_NULL(origin_addr);
std::vector<float> addr{reinterpret_cast<float *>(origin_addr),
reinterpret_cast<float *>(origin_addr) + size / sizeof(float)};
std::vector<Key> key(keys);
std::vector<int> lens;
lens.push_back(addr.size());
MS_LOG(INFO) << "the keys are:" << keys;
MS_LOG(INFO) << "the values are:" << addr;
PushData(key, addr, lens, kInitWeightsCmd);
init_keys_[key[0]] = true;
}
bool Worker::IsReadyForPush(const Key &key) {
std::vector<float> result(1, 0);
PullData({key}, &result, nullptr, kCheckReadyForPushCmd);
MS_LOG(INFO) << "key:" << key;
if (result[0] > 0) {
MS_LOG(INFO) << "IsReadyForPush:";
return true;
} else {
MS_LOG(INFO) << "IsReadyForPush:";
return false;
}
}
bool Worker::IsReadyForPull(const Key &key) {
std::vector<float> result(1, 0);
PullData({key}, &result, nullptr, kCheckReadyForPullCmd);
if (result[0] > 0) {
MS_LOG(INFO) << "IsReadyForPull";
return true;
} else {
MS_LOG(INFO) << "IsReadyForPull";
return false;
}
}
void Worker::PrepareSparseGradient(const size_t, const size_t, const mindspore::HashSet<int> &distinct_ids,
const std::vector<std::pair<int, float *>> &indice_to_grads, const int *all_indice,
const size_t segment_size, float *gradient, int *indices) {
MS_EXCEPTION_IF_NULL(all_indice);
MS_EXCEPTION_IF_NULL(gradient);
MS_EXCEPTION_IF_NULL(indices);
size_t offset = 0;
int64_t index = 0;
size_t segment_data_size = segment_size * sizeof(float);
size_t dst_size;
size_t src_size;
void *dst_data = nullptr;
void *src_data = nullptr;
for (auto &pair : indice_to_grads) {
if (distinct_ids.count(pair.first) == 0) {
continue;
}
indices[index++] = pair.first;
dst_size = segment_data_size;
src_size = segment_data_size;
dst_data = gradient + offset;
src_data = pair.second;
MS_EXCEPTION_IF_NULL(dst_data);
MS_EXCEPTION_IF_NULL(src_data);
errno_t ret = memcpy_s(gradient + offset, dst_size, pair.second, src_size);
if (ret != EOK) {
MS_LOG(ERROR) << "memcpy_s error, errorno(" << ret << ")";
return;
}
offset += segment_size;
}
}
void Worker::BuildSparseValue(const std::vector<int> &lengths, const size_t grad_index, const size_t indice_index,
const float *original_data, const float *grads, int *indices,
std::vector<float> *reduced_data) {
MS_EXCEPTION_IF_NULL(original_data);
MS_EXCEPTION_IF_NULL(grads);
MS_EXCEPTION_IF_NULL(indices);
MS_EXCEPTION_IF_NULL(reduced_data);
int64_t offset = 0;
size_t dst_size = 0;
size_t src_size = 0;
void *dst_data = nullptr;
void *src_data = nullptr;
for (size_t i = 0; i < lengths.size(); i++) {
if (i != grad_index && i != indice_index) {
size_t data_size = lengths[i] * sizeof(float);
dst_size = data_size;
src_size = data_size;
dst_data = reduced_data->data() + offset;
src_data = const_cast<float *>(original_data) + offset;
MS_EXCEPTION_IF_NULL(dst_data);
MS_EXCEPTION_IF_NULL(src_data);
errno_t mem_ret = memcpy_s(dst_data, dst_size, src_data, src_size);
if (mem_ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << mem_ret << ")";
return;
}
}
offset += lengths[i];
}
// Fill the reduced gradient
int64_t grad_offset = 0;
for (size_t i = 0; i < grad_index; i++) {
grad_offset += lengths[i];
}
size_t data_size = lengths[grad_index] * sizeof(float);
dst_size = data_size;
src_size = data_size;
dst_data = reduced_data->data() + grad_offset;
src_data = const_cast<float *>(grads);
MS_EXCEPTION_IF_NULL(dst_data);
errno_t ret = memcpy_s(dst_data, dst_size, src_data, src_size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
return;
}
// Fill the reduced indice
int64_t indice_offset = grad_offset + lengths[grad_index];
data_size = lengths[indice_index] * sizeof(float);
float *indice_data = reduced_data->data() + indice_offset;
dst_size = data_size;
src_size = data_size;
dst_data = indice_data;
src_data = indices;
MS_EXCEPTION_IF_NULL(dst_data);
MS_EXCEPTION_IF_NULL(src_data);
ret = memcpy_s(dst_data, dst_size, src_data, src_size);
if (ret != EOK) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno(" << ret << ")";
return;
}
}
void Worker::PushData(const std::vector<Key> &keys, const std::vector<float> &vals, const std::vector<int> &lens,
int cmd, int64_t) {
KVMessage kvs;
*kvs.mutable_keys() = {keys.begin(), keys.end()};
*kvs.mutable_values() = {vals.begin(), vals.end()};
*kvs.mutable_len() = {lens.begin(), lens.end()};
MS_LOG(INFO) << "the result is:" << embedding_table_ranges_.count(keys[0]);
if (embedding_table_ranges_.count(keys[0])) {
if (cmd == kInitWeightsCmd) {
SendForPush(cmd, kvs, worker_init_embedding_partitioner_, {});
} else {
const std::string &kv_data = kvs.SerializeAsString();
worker_node_.Broadcast(core::NodeRole::SERVER, kv_data, cmd);
}
} else {
SendForPush(cmd, kvs, round_robin_partitioner_, {});
}
}
void Worker::PushSparseData(const std::vector<Key> &keys, const std::vector<float> &vals, const std::vector<int> &lens,
size_t grad_index, size_t indice_index, size_t first_dim_size, size_t outer_dim_size) {
KVMessage kvs;
*kvs.mutable_keys() = {keys.begin(), keys.end()};
*kvs.mutable_values() = {vals.begin(), vals.end()};
*kvs.mutable_len() = {lens.begin(), lens.end()};
if (embedding_table_ranges_.count(keys[0])) {
std::map<int64_t, int64_t> attrs{{0, grad_index}, {1, indice_index}, {2, first_dim_size}, {3, outer_dim_size}};
SendForPush(kPushCmd, kvs, sparse_partitioner_, attrs);
} else {
SendForPush(kPushCmd, kvs, round_robin_partitioner_, {});
}
}
void Worker::PullData(const std::vector<Key> &keys, std::vector<float> *const vals, std::vector<int> *lens, int cmd,
int64_t priority) {
MS_EXCEPTION_IF_NULL(vals);
KVMessage kvs;
*kvs.mutable_keys() = {keys.begin(), keys.end()};
if (embedding_table_ranges_.count(keys[0])) {
SendForPull(cmd, kvs, broadcast_partitioner_, {}, vals, lens);
} else {
SendForPull(cmd, kvs, round_robin_partitioner_, {}, vals, lens);
}
}
void Worker::LookupIdPartitioner(const EmbeddingTableLookup &send, PartitionEmbeddingMessages *partition,
const std::map<int64_t, int64_t> &) {
MS_EXCEPTION_IF_NULL(partition);
const Key &key = send.key();
const std::vector<EmbeddingTableShardMetadata> &ranges = *(embedding_table_ranges_[key]);
partition->resize(ranges.size());
for (size_t i = 0; i < ranges.size(); i++) {
const EmbeddingTableShardMetadata &range = ranges[i];
const auto &begin = range.begin();
const auto &end = range.end();
mindspore::HashSet<int32_t> unique_ids;
auto &kvs = partition->at(i).second;
kvs.set_key(key);
std::for_each(send.keys().begin(), send.keys().end(), [&](int32_t lookup_id) {
if (lookup_id >= SizeToInt(begin) && lookup_id <= SizeToInt(end)) {
unique_ids.insert(lookup_id);
}
});
MS_LOG(DEBUG) << "The unique ids size is:" << unique_ids.size();
for (const auto &lookup_id : unique_ids) {
kvs.add_keys(lookup_id);
kvs.add_values(0.0f);
}
if (kvs.keys().empty()) {
partition->at(i).first = false;
} else {
partition->at(i).first = true;
}
}
}
void Worker::SparsePartitioner(const KVMessage &send, PartitionKVMessages *partition,
const std::map<int64_t, int64_t> &attrs) {
MS_EXCEPTION_IF_NULL(partition);
// Init variables
float *data = const_cast<float *>(send.values().data());
if (attrs.count(kGradIndex) == 0 || attrs.count(kIndiceIndex) == 0 || attrs.count(kFirstDimSize) == 0 ||
attrs.count(kOutDimSize) == 0) {
MS_LOG(EXCEPTION) << "Invalid attrs keys";
}
auto iter = attrs.find(kGradIndex);
size_t grad_index = static_cast<size_t>(iter->second);
iter = attrs.find(kIndiceIndex);
size_t indice_index = static_cast<size_t>(iter->second);
iter = attrs.find(kFirstDimSize);
size_t first_dim_size = static_cast<size_t>(iter->second);
iter = attrs.find(kOutDimSize);
size_t outer_dim_size = static_cast<size_t>(iter->second);
size_t grad_size = send.len()[SizeToInt(grad_index)];
size_t indice_size = send.len()[SizeToInt(indice_index)];
size_t segment_size = grad_size / indice_size;
size_t grad_offset = 0;
size_t indice_offset = 0;
for (size_t i = 0; i < grad_index; i++) {
grad_offset += send.len()[i];
}
for (size_t j = 0; j < indice_index; j++) {
indice_offset += send.len()[j];
}
float *grad_data = data + grad_offset;
void *indice_data_temp = data + indice_offset;
int *indice_data = reinterpret_cast<int *>(indice_data_temp);
// Build the mappings of indice to gradient
std::vector<std::pair<int, float *>> indice_to_grads;
for (size_t i = 0; i < indice_size; i++) {
int indice = indice_data[i];
float *grad = grad_data + i * segment_size;
indice_to_grads.push_back(std::make_pair(indice, grad));
}
const Key &key = send.keys()[0];
const std::vector<EmbeddingTableShardMetadata> &ranges = *(embedding_table_ranges_[key]);
partition->resize(ranges.size());
// Construct reduced sparse data for each server
for (size_t i = 0; i < ranges.size(); i++) {
const EmbeddingTableShardMetadata &range = ranges[i];
const auto &begin = range.begin();
const auto &end = range.end();
auto &kvs = partition->at(i).second;
*kvs.mutable_keys() = {send.keys().begin(), send.keys().end()};
*kvs.mutable_len() = {send.len().begin(), send.len().end()};
// Prepare the sparse gradient and indice
std::vector<int> indice_ids;
mindspore::HashSet<int> distinct_ids;
for (size_t j = 0; j < indice_size; j++) {
size_t indice = static_cast<size_t>(indice_data[j]);
if (indice >= begin && indice <= end) {
indice_ids.push_back(indice);
distinct_ids.insert(indice);
}
}
size_t indices_size = indice_ids.size();
if (indices_size > 0) {
size_t partition_segment_size = indices_size * segment_size;
std::vector<float> src_grad_data(partition_segment_size);
std::vector<int> src_indice_data(indices_size);
PrepareSparseGradient(begin, end, distinct_ids, indice_to_grads, indice_data, segment_size, src_grad_data.data(),
src_indice_data.data());
// Reduce the sparse gradient and indice
std::vector<float> new_grad(partition_segment_size);
std::vector<int> new_indices(indices_size);
mindspore::kernel::SparseGradient<int> unique_sparse_grad({new_grad.data(), new_indices.data(), indices_size});
Util::ReduceSparseGradient(src_grad_data.data(), src_indice_data.data(), indices_size, segment_size,
first_dim_size, outer_dim_size, &unique_sparse_grad);
// Update the length of reduce sparse gradient and indice
std::vector<int> reduced_lens = {kvs.len().begin(), kvs.len().end()};
reduced_lens[grad_index] = unique_sparse_grad.indices_size_ * segment_size;
reduced_lens[indice_index] = unique_sparse_grad.indices_size_;
// Build the sparse value to be sent
size_t total_size = std::accumulate(reduced_lens.begin(), reduced_lens.end(), 0, std::plus<int>());
std::vector<float> reduced_data(total_size, 0);
BuildSparseValue(reduced_lens, grad_index, indice_index, data, unique_sparse_grad.value_,
unique_sparse_grad.indices_, &reduced_data);
*kvs.mutable_len() = {reduced_lens.begin(), reduced_lens.end()};
*kvs.mutable_values() = {reduced_data.begin(), reduced_data.end()};
}
if (indices_size == 0) {
std::vector<float> no_keys;
std::vector<float> no_vals;
std::vector<float> no_lens;
no_keys.push_back(key);
no_vals.push_back(kGradValue);
*kvs.mutable_values() = {no_vals.begin(), no_vals.end()};
*kvs.mutable_len() = {no_lens.begin(), no_lens.end()};
}
partition->at(i).first = true;
}
}
void Worker::RoundRobinPartitioner(const KVMessage &send, PartitionKVMessages *partition,
const std::map<int64_t, int64_t> &) {
MS_EXCEPTION_IF_NULL(partition);
partition->resize(LongToSize(server_num_));
auto keys = send.keys();
auto values = send.values();
auto lens = send.len();
MS_LOG(INFO) << "the key size is:" << send.keys_size() << " the values size is:" << send.values_size()
<< " the lens:" << send.len_size();
size_t len;
Key param_key;
for (int i = 0; i < send.keys_size(); i++) {
param_key = keys[i];
int64_t server_id = key_to_server_id_[param_key];
if (!partition->at(LongToUlong(server_id)).first) {
partition->at(LongToUlong(server_id)).first = true;
}
KVMessage &server_kv_pairs = partition->at(LongToUlong(server_id)).second;
server_kv_pairs.add_keys(param_key);
if (values.empty()) {
continue;
}
len = lens[i];
int64_t offset = std::accumulate(lens.begin(), lens.begin() + i, 0);
auto val_begin = values.begin() + offset;
auto val_end = val_begin + len;
for (auto it = val_begin; it != val_end; ++it) {
server_kv_pairs.add_values(*it);
}
server_kv_pairs.add_len(len);
}
}
void Worker::WorkerInitEmbeddingPartitioner(const KVMessage &send, std::vector<std::pair<bool, KVMessage>> *partition,
const std::map<int64_t, int64_t> &) {
MS_EXCEPTION_IF_NULL(partition);
partition->resize(LongToSize(server_num_));
auto keys = send.keys();
auto values = send.values();
auto lens = send.len();
int32_t col_cnt = lens[0] / embedding_row_cnt_[keys[0]];
const std::vector<EmbeddingTableShardMetadata> &ranges = *(embedding_table_ranges_[keys[0]]);
for (size_t i = 0; i < ranges.size(); i++) {
size_t offset_begin = ranges[i].begin() * col_cnt;
size_t offset_end = (ranges[i].end() + 1) * col_cnt;
KVMessage kvs;
*kvs.mutable_keys() = keys;
*kvs.mutable_values() = {values.begin() + offset_begin, values.begin() + offset_end};
kvs.add_len(offset_end - offset_begin);
partition->at(i).first = true;
partition->at(i).second = kvs;
}
}
void Worker::UpdateEmbeddingPartitioner(const KVMessage &send, PartitionKVMessages *partition,
const std::map<int64_t, int64_t> &) {
MS_EXCEPTION_IF_NULL(partition);
const float *embedding_vals = send.values().data();
const uint64_t *lookup_ids = send.len().data();
size_t val_size = IntToSize(send.values_size());
size_t id_size = IntToSize(send.len_size());
if (id_size == 0) {
MS_LOG(EXCEPTION) << "The id size is 0.";
return;
}
size_t embedding_dim = val_size / id_size;
const Key &key = send.keys()[0];
const std::vector<EmbeddingTableShardMetadata> &ranges = *(embedding_table_ranges_[key]);
partition->resize(ranges.size());
for (size_t i = 0; i < ranges.size(); i++) {
const EmbeddingTableShardMetadata &range = ranges[i];
const auto &begin = range.begin();
const auto &end = range.end();
auto &kvs = partition->at(i).second;
kvs.add_keys(key);
for (size_t j = 0; j < id_size; j++) {
auto lookup_id = lookup_ids[j];
if (lookup_id >= begin && lookup_id <= end) {
kvs.add_keys(lookup_id);
for (size_t k = 0; k < embedding_dim; k++) {
kvs.add_values(embedding_vals[j * embedding_dim + k]);
}
}
}
if (kvs.keys_size() <= 1) {
partition->at(i).first = false;
} else {
partition->at(i).first = true;
}
}
}
void Worker::BroadcastPartitioner(const KVMessage &send, PartitionKVMessages *partition,
const std::map<int64_t, int64_t> &) {
MS_EXCEPTION_IF_NULL(partition);
partition->resize(LongToSize(server_num_));
for (size_t i = 0; i < LongToSize(server_num_); i++) {
partition->at(i).first = true;
partition->at(i).second = send;
}
}
void Worker::SendForPush(int cmd, const KVMessage &send, const KVPartitioner &partitioner,
const std::map<int64_t, int64_t> &attrs) {
PartitionKVMessages messages;
partitioner(send, &messages, attrs);
std::vector<uint32_t> rank_ids;
std::vector<std::string> data_strs;
for (size_t i = 0; i < messages.size(); i++) {
if (messages.at(i).first) {
rank_ids.push_back(i);
data_strs.emplace_back(messages.at(i).second.SerializeAsString());
}
}
worker_node_.Send(core::NodeRole::SERVER, rank_ids, data_strs, cmd);
}
void Worker::SendForPull(int cmd, const KVMessage &send, const KVPartitioner &partitioner,
const std::map<int64_t, int64_t> &, std::vector<float> *vals, std::vector<int> *lens) {
MS_EXCEPTION_IF_NULL(vals);
PartitionKVMessages messages;
partitioner(send, &messages, {});
std::vector<uint32_t> rank_ids;
std::vector<std::string> data_strs;
for (size_t i = 0; i < messages.size(); i++) {
if (messages.at(i).first) {
rank_ids.push_back(i);
data_strs.emplace_back(messages.at(i).second.SerializeAsString());
}
}
std::vector<VectorPtr> resp;
worker_node_.Send(core::NodeRole::SERVER, rank_ids, data_strs, cmd, &resp);
vals->clear();
for (size_t i = 0; i < resp.size(); ++i) {
KVMessage message;
CHECK_RETURN_TYPE(message.ParseFromArray(resp.at(i)->data(), SizeToInt(resp.at(i)->size())));
std::copy(message.values().begin(), message.values().end(), std::back_inserter(*vals));
if (lens) {
lens->clear();
std::copy(message.len().begin(), message.len().end(), std::back_inserter(*lens));
}
}
}
} // namespace ps
} // namespace mindspore

View File

@ -1,151 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PS_WORKER_H_
#define MINDSPORE_CCSRC_PS_WORKER_H_
#include <utility>
#include <memory>
#include <vector>
#include <string>
#include <numeric>
#include <functional>
#include <algorithm>
#include <map>
#include <mutex>
#include "utils/hash_map.h"
#include "utils/hash_set.h"
#include "utils/log_adapter.h"
#include "ir/tensor.h"
#include "ps/util.h"
#include "ps/constants.h"
#include "utils/shape_utils.h"
#include "ps/ps_cache/ps_data/ps_data_prefetch.h"
#include "ps/core/ps_worker_node.h"
#include "ps/embedding_table_shard_metadata.h"
#include "proto/comm.pb.h"
#include "proto/ps.pb.h"
#include "ps/ps_context.h"
#include "include/backend/visible.h"
namespace mindspore {
namespace ps {
class BACKEND_EXPORT Worker {
public:
static Worker &GetInstance();
using Callback = std::function<void()>;
using PartitionEmbeddingMessages = std::vector<std::pair<bool, EmbeddingTableLookup>>;
using PartitionKVMessages = std::vector<std::pair<bool, KVMessage>>;
using EmbeddingPartitioner = std::function<void(
const EmbeddingTableLookup &send, PartitionEmbeddingMessages *partition, const std::map<int64_t, int64_t> &attrs)>;
using KVPartitioner =
std::function<void(const KVMessage &send, PartitionKVMessages *partition, const std::map<int64_t, int64_t> &attrs)>;
void Run();
void Push(const std::vector<size_t> &keys, std::vector<uintptr_t> addrs, const ShapeVector &sizes);
void Pull(const size_t key, void *dev_addr, const size_t size);
size_t SetParamKey(const std::string &param_name);
size_t GetParamKey(const std::string &param_name);
void SetParamInitInServer(const std::string &param_name, bool init_in_server);
bool GetParamInitInServer(const std::string &param_name);
void SetKeyOptimId(size_t key, const std::string &optimizer_name);
void SetOptimInputShapes(size_t key, const ShapeVector &shape);
void AddEmbeddingTable(const Key &key, const size_t &row_count);
bool InitPSEmbeddingTable(const size_t &key, const std::vector<size_t> &input_shape,
const std::vector<size_t> &indices_shape, const std::vector<size_t> &output_shape,
const ParamInitInfoMessage &info, uint32_t timeout = core::kCommTimeoutInSeconds);
void InitPSParamAndOptim(const AnfNodePtr &input_node, const tensor::TensorPtr &tensor);
bool DoPSEmbeddingLookup(const Key &key, const std::vector<int> &lookup_ids, std::vector<float> *lookup_result,
int64_t cmd);
bool UpdateEmbeddingTable(const std::vector<Key> &keys, const std::vector<int> &lookup_ids,
const std::vector<float> &vals);
bool running() const { return running_; }
void Finalize();
private:
Worker() : server_num_(-1), running_(false), key_cnt_(0) {}
~Worker() = default;
Worker(const Worker &) = delete;
Worker &operator=(const Worker &) = delete;
void Initialize();
bool IsKeyInit(const size_t key);
void AddKeyToServerId(const Key &key);
void AddKeyByHashMod(const Key &key);
void InitPSOptimId(const size_t param_key);
void InitPSOptimInputShapes(const size_t key);
void InitPSParamData(const std::vector<size_t> &keys, void *const origin_addr, size_t size);
bool IsReadyForPush(const Key &key);
bool IsReadyForPull(const Key &key);
void PrepareSparseGradient(const size_t begin, const size_t end, const mindspore::HashSet<int> &distinct_ids,
const std::vector<std::pair<int, float *>> &indice_to_grads, const int *all_indice,
const size_t segment_size, float *gradient, int *indices);
void BuildSparseValue(const std::vector<int> &lengths, const size_t grad_index, const size_t indice_index,
const float *original_data, const float *grads, int *indices, std::vector<float> *reduced_data);
void PushData(const std::vector<Key> &keys, const std::vector<float> &vals, const std::vector<int> &lens = {},
int command = 0, int64_t priority = 0);
void PushSparseData(const std::vector<Key> &keys, const std::vector<float> &vals, const std::vector<int> &lens,
size_t grad_index, size_t indice_index, size_t first_dim_size, size_t outer_dim_size);
void PullData(const std::vector<Key> &keys, std::vector<float> *const vals, std::vector<int> *lens = nullptr,
int cmd = 0, int64_t priority = 0);
void LookupIdPartitioner(const EmbeddingTableLookup &send, PartitionEmbeddingMessages *partition,
const std::map<int64_t, int64_t> &attrs);
void SparsePartitioner(const KVMessage &send, PartitionKVMessages *partition,
const std::map<int64_t, int64_t> &attrs);
void RoundRobinPartitioner(const KVMessage &send, PartitionKVMessages *partition,
const std::map<int64_t, int64_t> &attrs);
void WorkerInitEmbeddingPartitioner(const KVMessage &send, std::vector<std::pair<bool, KVMessage>> *partition,
const std::map<int64_t, int64_t> &attrs);
void UpdateEmbeddingPartitioner(const KVMessage &send, PartitionKVMessages *partition,
const std::map<int64_t, int64_t> &attrs);
void BroadcastPartitioner(const KVMessage &send, PartitionKVMessages *partition,
const std::map<int64_t, int64_t> &attrs);
void SendForPush(int cmd, const KVMessage &send, const KVPartitioner &partitioner,
const std::map<int64_t, int64_t> &attrs);
void SendForPull(int cmd, const KVMessage &send, const KVPartitioner &partitioner,
const std::map<int64_t, int64_t> &attrs, std::vector<float> *vals, std::vector<int> *lens);
int64_t server_num_;
bool running_;
std::mutex running_mutex_;
size_t key_cnt_;
std::map<std::string, size_t> param_to_key_;
std::map<size_t, bool> init_keys_;
std::map<size_t, int64_t> key_to_optimId_;
std::map<size_t, std::vector<ShapeVector>> key_to_optim_shapes_;
std::map<std::string, bool> param_to_init_in_server_;
core::PSWorkerNode worker_node_;
EmbeddingPartitioner lookup_partitioner_;
KVPartitioner sparse_partitioner_;
KVPartitioner round_robin_partitioner_;
KVPartitioner worker_init_embedding_partitioner_;
KVPartitioner update_embedding_partitioner_;
KVPartitioner broadcast_partitioner_;
mindspore::HashMap<Key, int64_t> key_to_server_id_;
mindspore::HashMap<Key, size_t> embedding_row_cnt_;
mindspore::HashMap<Key, std::shared_ptr<std::vector<EmbeddingTableShardMetadata>>> embedding_table_ranges_;
};
} // namespace ps
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PS_WORKER_H_

View File

@ -38,9 +38,6 @@
#include "include/common/utils/utils.h"
#include "include/common/utils/parallel_context.h"
#include "include/common/debug/env_config_parser.h"
#ifdef WITH_BACKEND
#include "ps/ps_cache/ps_cache_manager.h"
#endif
#include "kernel/common_utils.h"
using mindspore::kernel::Address;
@ -644,9 +641,6 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph &graph) {
}
add_need_alloc_nodes(input_node);
}
#ifdef WITH_BACKEND
bool ps_cache_check = false;
#endif
std::map<AnfNodePtr, AnfNodePtr> shadow_backend_node_map;
GetShadowBackendNodeMap(graph, &shadow_backend_node_map);
for (auto &item : need_alloc_nodes) {
@ -660,25 +654,6 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph &graph) {
continue;
}
DeviceAddressPtr device_address = GetInternalDeviceAddress(graph, item);
#ifdef WITH_BACKEND
const std::string &param_name = item->fullname_with_scope();
if (ps::ps_cache_instance.IsHashTable(param_name) && !ps::PSContext::instance()->enable_distributed_mindrt()) {
MS_LOG(INFO) << "Parameter(" << param_name << ")"
<< " enables the embeddingLookup cache in parameter server training mode.";
// PS embeddingLookup cache check.
if (!ps_cache_check) {
CheckIfSupportPSEmbeddingCache(graph);
ps_cache_check = true;
}
const auto &address = ps::ps_cache_instance.QueryHashTableAddr(param_name);
MS_EXCEPTION_IF_NULL(address.addr);
device_address = CreateDeviceAddress(address.addr, address.size, AnfAlgo::GetOutputFormat(item, index),
output_type_id, {item, index});
device_address->set_host_shape(trans::GetRuntimePaddingShape(item, index));
AnfAlgo::SetOutputAddr(device_address, index, item.get());
continue;
}
#endif
GetDeviceAddress(item, shadow_backend_node_map, index, graph.graph_id(), &device_address);
AnfAlgo::SetOutputAddr(device_address, index, item.get());
}
@ -1859,147 +1834,5 @@ bool KernelRuntime::LaunchKernels(const session::KernelGraph &graph) {
void KernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
MS_LOG(INFO) << "Clear graph:" << graph_id << " runtime resource";
}
#ifdef WITH_BACKEND
namespace {
// Finalize ps cache module before throw an exception.
void FinalizePsCache(const std::string &exception) {
ps::ps_cache_instance.Finalize();
MS_LOG(EXCEPTION) << exception;
}
} // namespace
void KernelRuntime::GetFirstPSEmbeddingCache(const session::KernelGraph &graph,
AnfNodePtr *const first_cache_input_index,
size_t *const first_cache_size) const {
for (const auto &kernel : graph.execution_order()) {
MS_EXCEPTION_IF_NULL(kernel);
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel);
if (kernel_name != kGatherV2OpName && kernel_name != kSparseGatherV2OpName) {
continue;
}
auto input_param = common::AnfAlgo::GetPrevNodeOutput(kernel, 0, true);
auto input_index = common::AnfAlgo::GetPrevNodeOutput(kernel, 1, true);
MS_EXCEPTION_IF_NULL(input_param.first);
MS_EXCEPTION_IF_NULL(input_index.first);
auto param_name = input_param.first->fullname_with_scope();
if (!ps::ps_cache_instance.IsHashTable(param_name)) {
continue;
}
auto size = ps::ps_cache_instance.QueryHashTableSize(param_name);
while (input_index.first->isa<CNode>() && (common::AnfAlgo::GetCNodeName(input_index.first) == kCastOpName)) {
input_index = common::AnfAlgo::GetPrevNodeOutput(input_index.first, 0, true);
MS_EXCEPTION_IF_NULL(input_index.first);
}
auto cnode = common::AnfAlgo::IsGraphKernel(input_index.first)
? common::AnfAlgo::GetOutputOfGraphkernel(input_index)
: input_index.first;
MS_EXCEPTION_IF_NULL(cnode);
if (!cnode->isa<CNode>()) {
FinalizePsCache("The embeddingLookup whose input index should be a CNode but got " +
cnode->fullname_with_scope());
}
auto input_index_node_name = common::AnfAlgo::GetCNodeName(cnode);
if (input_index_node_name != kGetNextOpName) {
bool full_batch = parallel::ParallelContext::GetInstance()->full_batch();
if ((!full_batch && (input_index_node_name != kUniqueOpName)) ||
(full_batch && (input_index_node_name != kMinimumOpName))) {
MS_LOG(ERROR) << "The input index of the embeddingLookup(" << kernel->fullname_with_scope()
<< ") cache is from " << cnode->fullname_with_scope();
FinalizePsCache(
"The embeddingLookup whose input index isn't from dataset doesn't support cache in parameter server training "
"mode.");
}
}
*first_cache_input_index = cnode;
*first_cache_size = size;
MS_LOG(INFO) << "The input index of the first embeddingLookup cache is from " << cnode->fullname_with_scope()
<< ", the cache size is " << size;
return;
}
}
void KernelRuntime::CheckSparsePSEmbeddingCache(const CNodePtr &node) const {
MS_EXCEPTION_IF_NULL(node);
auto pre_node = common::AnfAlgo::GetPrevNodeOutput(node, 1, true);
MS_EXCEPTION_IF_NULL(pre_node.first);
while (pre_node.first->isa<CNode>() && (common::AnfAlgo::GetCNodeName(pre_node.first) != kUniqueOpName)) {
pre_node = common::AnfAlgo::GetPrevNodeOutput(pre_node.first, 0, true);
MS_EXCEPTION_IF_NULL(pre_node.first);
}
if (!(pre_node.first->isa<CNode>()) || (common::AnfAlgo::GetCNodeName(pre_node.first) != kUniqueOpName)) {
FinalizePsCache("The input_indices of kernel[SparseGatherV2] must be unique in parameter server cache mode");
}
pre_node = common::AnfAlgo::GetPrevNodeOutput(pre_node.first, 0, true);
MS_EXCEPTION_IF_NULL(pre_node.first);
while (pre_node.first->isa<CNode>() && (common::AnfAlgo::GetCNodeName(pre_node.first) == kCastOpName)) {
pre_node = common::AnfAlgo::GetPrevNodeOutput(pre_node.first, 0, true);
MS_EXCEPTION_IF_NULL(pre_node.first);
}
if (!(pre_node.first->isa<CNode>()) || (common::AnfAlgo::GetCNodeName(pre_node.first) != kGetNextOpName)) {
FinalizePsCache(
"The input indices of kernel[Unique] must be produced from dataset directly and the indices value can not be "
"changed before delivering to kernel[Unique] in parameter server cache mode.");
}
}
void KernelRuntime::CheckIfSupportPSEmbeddingCache(const session::KernelGraph &graph) {
AnfNodePtr first_cache_input_index = nullptr;
size_t first_cache_size = 0;
GetFirstPSEmbeddingCache(graph, &first_cache_input_index, &first_cache_size);
MS_EXCEPTION_IF_NULL(first_cache_input_index);
for (const auto &kernel : graph.execution_order()) {
MS_EXCEPTION_IF_NULL(kernel);
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel);
if (kernel_name != kGatherV2OpName && kernel_name != kSparseGatherV2OpName) {
continue;
}
auto input_param = common::AnfAlgo::GetPrevNodeOutput(kernel, 0, true);
auto input_index = common::AnfAlgo::GetPrevNodeOutput(kernel, 1, true);
MS_EXCEPTION_IF_NULL(input_param.first);
MS_EXCEPTION_IF_NULL(input_index.first);
if (!input_param.first->isa<Parameter>()) {
continue;
}
auto param_name = input_param.first->fullname_with_scope();
if (ps::ps_cache_instance.IsHashTable(param_name) && (kernel_name == kSparseGatherV2OpName)) {
CheckSparsePSEmbeddingCache(kernel);
}
while (input_index.first->isa<CNode>() && (common::AnfAlgo::GetCNodeName(input_index.first) == kCastOpName)) {
input_index = common::AnfAlgo::GetPrevNodeOutput(input_index.first, 0, true);
MS_EXCEPTION_IF_NULL(input_index.first);
}
auto cnode = common::AnfAlgo::IsGraphKernel(input_index.first)
? common::AnfAlgo::GetOutputOfGraphkernel(input_index)
: input_index.first;
MS_EXCEPTION_IF_NULL(cnode);
if (cnode == first_cache_input_index) {
if (!ps::ps_cache_instance.IsHashTable(param_name)) {
MS_LOG(ERROR) << "The embeddingLookup(" << kernel->fullname_with_scope() << ") doesn't enable cache.";
FinalizePsCache(
"All the embeddingLookups whose input indices are from dataset must enable cache at the same time when one "
"of them enables cache in parameter server training mode.");
}
auto size = ps::ps_cache_instance.QueryHashTableSize(param_name);
if (size != first_cache_size) {
MS_LOG(ERROR) << "The cache size(" << size << ") of embeddingLookup(" << kernel->fullname_with_scope()
<< ") is not the same as other embeddingLookup cache size(" << first_cache_size << ").";
FinalizePsCache("The cache sizes of embeddingLookups are not the same in parameter server training mode.");
}
} else if (ps::ps_cache_instance.IsHashTable(param_name)) {
MS_LOG(ERROR) << "The input index of the embeddingLookup(" << kernel->fullname_with_scope() << ") cache is from "
<< cnode->fullname_with_scope();
FinalizePsCache(
"The embeddingLookup whose input index isn't from dataset doesn't support cache in parameter server training "
"mode.");
} else if (cnode->isa<CNode>() && (common::AnfAlgo::GetCNodeName(cnode) == kGetNextOpName)) {
MS_LOG(ERROR) << "The EmbeddingLookup kernel(" << kernel->fullname_with_scope() << ") doesn't enable cache.";
FinalizePsCache(
"All EmbeddingLookup kernels whose input indices are from dataset must enable cache at the same time.");
}
}
}
#endif
} // namespace device
} // namespace mindspore

View File

@ -16,18 +16,10 @@
#include "runtime/device/kernel_runtime_manager.h"
#include "utils/log_adapter.h"
#ifdef WITH_BACKEND
#include "ps/ps_cache/ps_cache_manager.h"
#endif
namespace mindspore {
namespace device {
void KernelRuntimeManager::ClearRuntimeResource() {
#ifdef WITH_BACKEND
if (ps::PSContext::instance()->is_worker() && ps::PsDataPrefetch::GetInstance().cache_enable()) {
ps::ps_cache_instance.SyncEmbeddingTable();
}
#endif
std::lock_guard<std::mutex> guard(lock_);
for (auto &iter : runtime_map_) {
MS_LOG(INFO) << "Release device " << iter.first;
@ -128,11 +120,6 @@ void KernelRuntimeManager::ReleaseKernelRuntime(const std::string &device_name,
if (runtime == nullptr) {
return;
}
#ifdef WITH_BACKEND
if (ps::PSContext::instance()->is_worker() && ps::PsDataPrefetch::GetInstance().cache_enable()) {
ps::ps_cache_instance.SyncEmbeddingTable();
}
#endif
runtime->ReleaseDeviceRes();
runtime_map_.erase(runtime_iter);
}

View File

@ -21,9 +21,6 @@
#include "runtime/device/kernel_runtime_manager.h"
#include "include/common/utils/comm_manager.h"
#include "include/common/utils/scoped_long_running.h"
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#include "ps/ps_cache/ps_cache_manager.h"
#endif
namespace mindspore {
namespace session {

View File

@ -21,9 +21,6 @@
#include "runtime/device/kernel_runtime_manager.h"
#include "include/common/utils/comm_manager.h"
#include "include/common/utils/scoped_long_running.h"
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#include "ps/ps_cache/ps_cache_manager.h"
#endif
namespace mindspore {
namespace session {

View File

@ -226,13 +226,6 @@ list(REMOVE_ITEM MINDSPORE_SRC_LIST
"../../../mindspore/ccsrc/frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/util.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/scheduler.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/optimizer_info.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/optimizer_info_builder.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/worker.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/parameter_server.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/ps_cache/gpu/gpu_ps_cache.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/ps_cache/ascend/ascend_ps_cache.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/fl/server/kernel/sgd_kernel.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/fl/server/kernel/apply_momentum_kernel.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST

View File

@ -1,38 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common/common_test.h"
#include "ps/embedding_table_shard_metadata.h"
namespace mindspore {
namespace ps {
class TestEmbeddingTableShardMetadata : public UT::Common {
public:
TestEmbeddingTableShardMetadata() = default;
virtual ~TestEmbeddingTableShardMetadata() = default;
void SetUp() override {}
void TearDown() override {}
};
TEST_F(TestEmbeddingTableShardMetadata, EmbeddingTable) {
EmbeddingTableShardMetadata embedding_table_shard(1, 100);
EXPECT_EQ(embedding_table_shard.begin(), 1);
EXPECT_EQ(embedding_table_shard.end(), 100);
EXPECT_EQ(embedding_table_shard.size(), 99);
}
} // namespace ps
} // namespace mindspore

View File

@ -1,59 +0,0 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ps/ps_cache/ps_cache_manager.h"
#include "ps/util.h"
#include "ps/worker.h"
#include "ps/scheduler.h"
#include "ps/parameter_server.h"
namespace mindspore {
namespace ps {
PsCacheManager &PsCacheManager::GetInstance() {
static PsCacheManager instance{};
return instance;
}
void PsCacheManager::Finalize() {}
int PsCacheManager::cache_indices_lower_bound() const { return 1; }
bool Util::IsRoleOfPServer() { return true; }
bool Util::IsRoleOfScheduler() { return true; }
bool Util::FuseServerCommOps(const pipeline::ResourcePtr &res) { return true; }
Worker &Worker::GetInstance() {
static Worker instance{};
return instance;
}
void Worker::Run() {}
void Worker::Finalize() {}
ParameterServer &ParameterServer::GetInstance() {
static ParameterServer instance{};
return instance;
}
void ParameterServer::Run(const FuncGraphPtr &func_graph) {}
Scheduler &Scheduler::GetInstance() {
static Scheduler instance{};
return instance;
}
void Scheduler::Run() {}
} // namespace ps
} // namespace mindspore