forked from mindspore-Ecosystem/mindspore
!35520 backend compiled macro
Merge pull request !35520 from liubuyu/backend
This commit is contained in:
commit
758dff008d
|
@ -123,7 +123,7 @@ if(ENABLE_TESTCASES OR (NOT ENABLE_D))
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(NOT (ENABLE_TESTCASES OR ENABLE_TEST) AND NOT (CMAKE_SYSTEM_NAME MATCHES "Windows" OR
|
if(NOT (ENABLE_TESTCASES OR ENABLE_TEST) AND NOT (CMAKE_SYSTEM_NAME MATCHES "Windows" OR
|
||||||
CMAKE_SYSTEM_NAME MATCHES "Darwin"))
|
CMAKE_SYSTEM_NAME MATCHES "Darwin") AND (ENABLE_D OR ENABLE_GPU OR ENABLE_CPU))
|
||||||
add_compile_definitions(WITH_BACKEND)
|
add_compile_definitions(WITH_BACKEND)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
@ -69,7 +69,7 @@
|
||||||
#include "include/common/debug/rdr/recorder_manager.h"
|
#include "include/common/debug/rdr/recorder_manager.h"
|
||||||
#include "debug/rdr/graph_recorder.h"
|
#include "debug/rdr/graph_recorder.h"
|
||||||
#endif
|
#endif
|
||||||
#if ENABLE_CPU && ENABLE_D
|
#ifdef WITH_BACKEND
|
||||||
#include "ps/util.h"
|
#include "ps/util.h"
|
||||||
#include "ps/ps_cache/ps_cache_manager.h"
|
#include "ps/ps_cache/ps_cache_manager.h"
|
||||||
#endif
|
#endif
|
||||||
|
@ -247,7 +247,7 @@ bool TensorNeedSync(const std::shared_ptr<KernelGraph> &kernel_graph, const AnfN
|
||||||
}
|
}
|
||||||
MS_EXCEPTION_IF_NULL(memcpy_nums);
|
MS_EXCEPTION_IF_NULL(memcpy_nums);
|
||||||
(*memcpy_nums)++;
|
(*memcpy_nums)++;
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
#ifdef WITH_BACKEND
|
||||||
const std::string ¶m_name = parameter->fullname_with_scope();
|
const std::string ¶m_name = parameter->fullname_with_scope();
|
||||||
if (ps::ps_cache_instance.IsHashTable(param_name)) {
|
if (ps::ps_cache_instance.IsHashTable(param_name)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -346,7 +346,7 @@ void AscendSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_gra
|
||||||
}
|
}
|
||||||
if (AnfAlgo::OutputAddrExist(input_node, 0) &&
|
if (AnfAlgo::OutputAddrExist(input_node, 0) &&
|
||||||
TensorNeedSync(kernel_graph, input_node, tensor, &device_memcpy_nums)) {
|
TensorNeedSync(kernel_graph, input_node, tensor, &device_memcpy_nums)) {
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
#ifdef WITH_BACKEND
|
||||||
const std::string ¶m_name = input_node->fullname_with_scope();
|
const std::string ¶m_name = input_node->fullname_with_scope();
|
||||||
if (ps::ps_cache_instance.IsHashTable(param_name)) {
|
if (ps::ps_cache_instance.IsHashTable(param_name)) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -459,7 +459,7 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
|
||||||
|
|
||||||
// adjust kernel
|
// adjust kernel
|
||||||
AdjustKernel(root_graph);
|
AdjustKernel(root_graph);
|
||||||
#if ENABLE_CPU && ENABLE_D
|
#ifdef WITH_BACKEND
|
||||||
InitPsWorker(root_graph);
|
InitPsWorker(root_graph);
|
||||||
#endif
|
#endif
|
||||||
// assign stream
|
// assign stream
|
||||||
|
@ -538,7 +538,7 @@ void AscendSession::BuildGraphImpl(GraphId graph_id) {
|
||||||
single_graph->UpdateExecuteKernelStreamLabel();
|
single_graph->UpdateExecuteKernelStreamLabel();
|
||||||
// adjust execution order because merge child graph and other special operations
|
// adjust execution order because merge child graph and other special operations
|
||||||
AdjustKernel(graph);
|
AdjustKernel(graph);
|
||||||
#if ENABLE_CPU && ENABLE_D
|
#ifdef WITH_BACKEND
|
||||||
InitPsWorker(graph);
|
InitPsWorker(graph);
|
||||||
#endif
|
#endif
|
||||||
// Assign streams for control sink and hccl and so on
|
// Assign streams for control sink and hccl and so on
|
||||||
|
@ -616,7 +616,7 @@ void AscendSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_g
|
||||||
debugger_->PreExecute(kernel_graph);
|
debugger_->PreExecute(kernel_graph);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if ENABLE_CPU && ENABLE_D
|
#ifdef WITH_BACKEND
|
||||||
// Initialize parameter server
|
// Initialize parameter server
|
||||||
InitPSParamAndOptim(kernel_graph, inputs);
|
InitPSParamAndOptim(kernel_graph, inputs);
|
||||||
std::string channel_name;
|
std::string channel_name;
|
||||||
|
@ -1000,17 +1000,19 @@ void AscendSession::BuildOpsInGraph(const GraphId &graph_id, const std::map<AnfN
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef ENABLE_SECURITY
|
#ifndef ENABLE_SECURITY
|
||||||
void DumpInit(uint32_t device_id) {
|
void DumpInit(const std::string &device_type, uint32_t device_id) {
|
||||||
auto &json_parser = DumpJsonParser::GetInstance();
|
auto &json_parser = DumpJsonParser::GetInstance();
|
||||||
json_parser.Parse();
|
json_parser.Parse();
|
||||||
json_parser.CopyDumpJsonToDir(device_id);
|
json_parser.CopyDumpJsonToDir(device_id);
|
||||||
json_parser.CopyHcclJsonToDir(device_id);
|
json_parser.CopyHcclJsonToDir(device_id);
|
||||||
json_parser.CopyMSCfgJsonToDir(device_id);
|
json_parser.CopyMSCfgJsonToDir(device_id);
|
||||||
if (json_parser.async_dump_enabled()) {
|
if (json_parser.async_dump_enabled()) {
|
||||||
#ifdef ENABLE_D
|
#if !(defined(ENABLE_TEST) || defined(ENABLE_TESTCASES))
|
||||||
// register callback to adx
|
if (device_type == kAscendDevice) {
|
||||||
if (json_parser.FileFormatIsNpy()) {
|
// register callback to adx
|
||||||
AdxRegDumpProcessCallBack(DumpDataCallBack);
|
if (json_parser.FileFormatIsNpy()) {
|
||||||
|
AdxRegDumpProcessCallBack(DumpDataCallBack);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (AdxDataDumpServerInit() != 0) {
|
if (AdxDataDumpServerInit() != 0) {
|
||||||
|
@ -1035,7 +1037,8 @@ void AscendSession::InitRuntimeResource() {
|
||||||
rank_id_ = GetRankId();
|
rank_id_ = GetRankId();
|
||||||
}
|
}
|
||||||
#ifndef ENABLE_SECURITY
|
#ifndef ENABLE_SECURITY
|
||||||
DumpInit(rank_id_);
|
auto device_type = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||||
|
DumpInit(device_type, rank_id_);
|
||||||
#endif
|
#endif
|
||||||
MS_LOG(INFO) << "Status record: end init runtime resource.";
|
MS_LOG(INFO) << "Status record: end init runtime resource.";
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,9 @@
|
||||||
#include "plugin/factory/ms_factory.h"
|
#include "plugin/factory/ms_factory.h"
|
||||||
#include "runtime/device/kernel_runtime.h"
|
#include "runtime/device/kernel_runtime.h"
|
||||||
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
||||||
|
#ifdef ENABLE_AKG
|
||||||
#include "plugin/device/cpu/kernel/akg/akg_cpu_kernel_build.h"
|
#include "plugin/device/cpu/kernel/akg/akg_cpu_kernel_build.h"
|
||||||
|
#endif
|
||||||
#include "plugin/device/cpu/hal/device/kernel_select_cpu.h"
|
#include "plugin/device/cpu/hal/device/kernel_select_cpu.h"
|
||||||
#include "backend/common/optimizer/optimizer.h"
|
#include "backend/common/optimizer/optimizer.h"
|
||||||
#include "backend/common/optimizer/pass_manager.h"
|
#include "backend/common/optimizer/pass_manager.h"
|
||||||
|
@ -42,7 +44,7 @@
|
||||||
#ifndef ENABLE_SECURITY
|
#ifndef ENABLE_SECURITY
|
||||||
#include "debug/data_dump/dump_json_parser.h"
|
#include "debug/data_dump/dump_json_parser.h"
|
||||||
#endif
|
#endif
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
#ifdef WITH_BACKEND
|
||||||
#include "ps/util.h"
|
#include "ps/util.h"
|
||||||
#include "ps/ps_context.h"
|
#include "ps/ps_context.h"
|
||||||
#endif
|
#endif
|
||||||
|
@ -87,7 +89,7 @@ void CPUSession::Reorder(std::vector<CNodePtr> *node_list) {
|
||||||
void CPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
|
void CPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
|
||||||
auto optimizer = std::make_shared<opt::GraphOptimizer>();
|
auto optimizer = std::make_shared<opt::GraphOptimizer>();
|
||||||
auto pm = std::make_shared<opt::PassManager>();
|
auto pm = std::make_shared<opt::PassManager>();
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
auto ms_context = MsContext::GetInstance();
|
auto ms_context = MsContext::GetInstance();
|
||||||
MS_EXCEPTION_IF_NULL(ms_context);
|
MS_EXCEPTION_IF_NULL(ms_context);
|
||||||
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode && ps::PSContext::instance()->is_ps_mode()) {
|
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode && ps::PSContext::instance()->is_ps_mode()) {
|
||||||
|
@ -195,7 +197,7 @@ void CPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_grap
|
||||||
MS_LOG(INFO) << "Bind input output address";
|
MS_LOG(INFO) << "Bind input output address";
|
||||||
runtime_.BindInputOutput(kernel_graph.get(), inputs, outputs);
|
runtime_.BindInputOutput(kernel_graph.get(), inputs, outputs);
|
||||||
|
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
InitPSParamAndOptim(kernel_graph, inputs);
|
InitPSParamAndOptim(kernel_graph, inputs);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,9 +22,6 @@
|
||||||
#include "include/common/utils/comm_manager.h"
|
#include "include/common/utils/comm_manager.h"
|
||||||
#include "include/common/utils/scoped_long_running.h"
|
#include "include/common/utils/scoped_long_running.h"
|
||||||
#include "pybind_api/ir/tensor_py.h"
|
#include "pybind_api/ir/tensor_py.h"
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
|
||||||
#include "ps/ps_cache/ps_cache_manager.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
using mindspore::tensor::TensorPy;
|
using mindspore::tensor::TensorPy;
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
|
|
|
@ -85,7 +85,7 @@
|
||||||
#include "common/graph_kernel/graph_kernel_flags.h"
|
#include "common/graph_kernel/graph_kernel_flags.h"
|
||||||
#include "include/common/utils/utils.h"
|
#include "include/common/utils/utils.h"
|
||||||
#include "abstract/utils.h"
|
#include "abstract/utils.h"
|
||||||
#if ENABLE_CPU && ENABLE_GPU
|
#ifdef WITH_BACKEND
|
||||||
#include "ps/util.h"
|
#include "ps/util.h"
|
||||||
#include "ps/ps_cache/ps_cache_manager.h"
|
#include "ps/ps_cache/ps_cache_manager.h"
|
||||||
#endif
|
#endif
|
||||||
|
@ -358,7 +358,7 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
|
||||||
auto input_node = input_nodes[i];
|
auto input_node = input_nodes[i];
|
||||||
MS_EXCEPTION_IF_NULL(input_node);
|
MS_EXCEPTION_IF_NULL(input_node);
|
||||||
if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0)) {
|
if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0)) {
|
||||||
#if ENABLE_CPU && ENABLE_GPU
|
#ifdef WITH_BACKEND
|
||||||
const std::string ¶m_name = input_node->fullname_with_scope();
|
const std::string ¶m_name = input_node->fullname_with_scope();
|
||||||
if (ps::ps_cache_instance.IsHashTable(param_name)) {
|
if (ps::ps_cache_instance.IsHashTable(param_name)) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -438,7 +438,7 @@ GraphId GPUSession::CompileGraphImpl(const KernelGraphPtr &graph) {
|
||||||
GraphKernelOptimize(graph);
|
GraphKernelOptimize(graph);
|
||||||
// Start gpu kernel runtime
|
// Start gpu kernel runtime
|
||||||
StartKernelRT();
|
StartKernelRT();
|
||||||
#if ENABLE_CPU && ENABLE_GPU
|
#ifdef WITH_BACKEND
|
||||||
InitPsWorker(graph);
|
InitPsWorker(graph);
|
||||||
#endif
|
#endif
|
||||||
// Assign CUDA streams
|
// Assign CUDA streams
|
||||||
|
@ -518,7 +518,7 @@ void GPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_grap
|
||||||
E2eDump::UpdateIterOldRTDump(kernel_graph.get());
|
E2eDump::UpdateIterOldRTDump(kernel_graph.get());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if ENABLE_CPU && ENABLE_GPU
|
#ifdef WITH_BACKEND
|
||||||
// Initialize parameter server
|
// Initialize parameter server
|
||||||
InitPSParamAndOptim(kernel_graph, inputs);
|
InitPSParamAndOptim(kernel_graph, inputs);
|
||||||
#endif
|
#endif
|
||||||
|
@ -555,7 +555,7 @@ void GPUSession::ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph)
|
||||||
int kernel_num = kernel_graph->execution_order().size();
|
int kernel_num = kernel_graph->execution_order().size();
|
||||||
int64_t loopsize = (kernel_num > 1) ? ConfigManager::GetInstance().gpu_loopsink_size() : 1;
|
int64_t loopsize = (kernel_num > 1) ? ConfigManager::GetInstance().gpu_loopsink_size() : 1;
|
||||||
for (int64_t i = 0; i < loopsize; i++) {
|
for (int64_t i = 0; i < loopsize; i++) {
|
||||||
#if ENABLE_CPU && ENABLE_GPU
|
#ifdef WITH_BACKEND
|
||||||
std::string channel_name;
|
std::string channel_name;
|
||||||
if (ps::PsDataPrefetch::GetInstance().cache_enable() && IsGetNextGraph(kernel_graph, &channel_name)) {
|
if (ps::PsDataPrefetch::GetInstance().cache_enable() && IsGetNextGraph(kernel_graph, &channel_name)) {
|
||||||
ps::ps_cache_instance.IncreaseGraphStep(channel_name);
|
ps::ps_cache_instance.IncreaseGraphStep(channel_name);
|
||||||
|
@ -605,7 +605,7 @@ void GPUSession::UpdateOutputTensors(const VectorRef *outputs,
|
||||||
// But one time memory application scenarios need to be skipped, because the memory is not allocated next step:
|
// But one time memory application scenarios need to be skipped, because the memory is not allocated next step:
|
||||||
// 1. Non cnode 2. Communication kernel.
|
// 1. Non cnode 2. Communication kernel.
|
||||||
bool ps_mode = false;
|
bool ps_mode = false;
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
#ifdef WITH_BACKEND
|
||||||
ps_mode = ps::PSContext::instance()->is_ps_mode();
|
ps_mode = ps::PSContext::instance()->is_ps_mode();
|
||||||
#endif
|
#endif
|
||||||
if (node->isa<CNode>() && !common::AnfAlgo::IsCommunicationOp(node) && !ps_mode) {
|
if (node->isa<CNode>() && !common::AnfAlgo::IsCommunicationOp(node) && !ps_mode) {
|
||||||
|
|
|
@ -46,7 +46,7 @@
|
||||||
#include "utils/file_utils.h"
|
#include "utils/file_utils.h"
|
||||||
#include "utils/trace_base.h"
|
#include "utils/trace_base.h"
|
||||||
#include "include/common/utils/parallel_context.h"
|
#include "include/common/utils/parallel_context.h"
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
#include "ps/ps_cache/ps_cache_manager.h"
|
#include "ps/ps_cache/ps_cache_manager.h"
|
||||||
#include "ps/constants.h"
|
#include "ps/constants.h"
|
||||||
#include "ps/util.h"
|
#include "ps/util.h"
|
||||||
|
@ -520,7 +520,7 @@ void SetReturnNode(const AnfNodePtr &node, KernelGraph *graph) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
// Get all users of this node
|
// Get all users of this node
|
||||||
void GetNodeUsedList(const FuncGraphPtr &kernel_graph, const AnfNodePtr &node,
|
void GetNodeUsedList(const FuncGraphPtr &kernel_graph, const AnfNodePtr &node,
|
||||||
std::vector<AnfNodePtr> *node_users_list) {
|
std::vector<AnfNodePtr> *node_users_list) {
|
||||||
|
@ -3010,7 +3010,7 @@ void SessionBasic::DumpGraphs(const std::vector<KernelGraphPtr> &graphs) {
|
||||||
|
|
||||||
void SessionBasic::UnifyMindIR(const KernelGraphPtr &graph) { opt::CommonUnifyMindIR(graph); }
|
void SessionBasic::UnifyMindIR(const KernelGraphPtr &graph) { opt::CommonUnifyMindIR(graph); }
|
||||||
|
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
void SessionBasic::InitPsWorker(const KernelGraphPtr &kernel_graph) {
|
void SessionBasic::InitPsWorker(const KernelGraphPtr &kernel_graph) {
|
||||||
if (!ps::PSContext::instance()->is_worker()) {
|
if (!ps::PSContext::instance()->is_worker()) {
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -346,7 +346,7 @@ class BACKEND_EXPORT SessionBasic : public std::enable_shared_from_this<SessionB
|
||||||
std::vector<uint32_t> GetAllReduceSplitIndex();
|
std::vector<uint32_t> GetAllReduceSplitIndex();
|
||||||
virtual std::string GetCommWorldGroup() { return std::string(); }
|
virtual std::string GetCommWorldGroup() { return std::string(); }
|
||||||
void DumpGraphs(const std::vector<KernelGraphPtr> &graphs);
|
void DumpGraphs(const std::vector<KernelGraphPtr> &graphs);
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
void CheckPSModeConsistence(const KernelGraphPtr &kernel_graph) const;
|
void CheckPSModeConsistence(const KernelGraphPtr &kernel_graph) const;
|
||||||
void GetBatchElements(const AnfNodePtr &kernel_node) const;
|
void GetBatchElements(const AnfNodePtr &kernel_node) const;
|
||||||
void InitPsWorker(const KernelGraphPtr &kernel_graph);
|
void InitPsWorker(const KernelGraphPtr &kernel_graph);
|
||||||
|
|
|
@ -40,16 +40,13 @@
|
||||||
#include "runtime/pynative/graph_adapter.h"
|
#include "runtime/pynative/graph_adapter.h"
|
||||||
#include "distributed/recovery/recovery_context.h"
|
#include "distributed/recovery/recovery_context.h"
|
||||||
#include "include/common/utils/scoped_long_running.h"
|
#include "include/common/utils/scoped_long_running.h"
|
||||||
#ifdef ENABLE_D
|
|
||||||
#include "include/common/utils/callbacks_ge.h"
|
|
||||||
#endif
|
|
||||||
#ifdef ENABLE_DEBUGGER
|
#ifdef ENABLE_DEBUGGER
|
||||||
#include "debug/debugger/debugger.h"
|
#include "debug/debugger/debugger.h"
|
||||||
#endif
|
#endif
|
||||||
#ifndef ENABLE_SECURITY
|
#ifndef ENABLE_SECURITY
|
||||||
#include "debug/data_dump/dump_json_parser.h"
|
#include "debug/data_dump/dump_json_parser.h"
|
||||||
#endif
|
#endif
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
#include "ps/ps_context.h"
|
#include "ps/ps_context.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -352,7 +349,7 @@ VectorRef MsBackend::MsRunGraph(const GraphId &g, const VectorRef &args, const s
|
||||||
const session::SessionPtr &exe_session = ((target != target_device_ && !target.empty()) ? other_sess_ : target_sess_);
|
const session::SessionPtr &exe_session = ((target != target_device_ && !target.empty()) ? other_sess_ : target_sess_);
|
||||||
MS_EXCEPTION_IF_NULL(exe_session);
|
MS_EXCEPTION_IF_NULL(exe_session);
|
||||||
|
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
// If in PS mode, must use sync mode to run graph in case that the weights on server are not updated in the last step.
|
// If in PS mode, must use sync mode to run graph in case that the weights on server are not updated in the last step.
|
||||||
if (ps::PSContext::instance()->is_ps_mode()) {
|
if (ps::PSContext::instance()->is_ps_mode()) {
|
||||||
exe_session->RunGraph(g, inputs, &outputs);
|
exe_session->RunGraph(g, inputs, &outputs);
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
#include "ir/graph_utils.h"
|
#include "ir/graph_utils.h"
|
||||||
#include "utils/ms_context.h"
|
#include "utils/ms_context.h"
|
||||||
#include "utils/trace_base.h"
|
#include "utils/trace_base.h"
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
#ifdef WITH_BACKEND
|
||||||
#include "ps/ps_context.h"
|
#include "ps/ps_context.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -598,7 +598,7 @@ void SetMindRTEnable() {
|
||||||
auto context_ptr = MsContext::GetInstance();
|
auto context_ptr = MsContext::GetInstance();
|
||||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||||
|
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && (!defined __APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
if (ps::PSContext::instance()->is_ps_mode() && !ps::PSContext::instance()->enable_distributed_mindrt()) {
|
if (ps::PSContext::instance()->is_ps_mode() && !ps::PSContext::instance()->enable_distributed_mindrt()) {
|
||||||
context_ptr->set_param<bool>(MS_CTX_ENABLE_MINDRT, false);
|
context_ptr->set_param<bool>(MS_CTX_ENABLE_MINDRT, false);
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -26,12 +26,7 @@
|
||||||
#include "kernel/akg/akg_kernel_json_generator.h"
|
#include "kernel/akg/akg_kernel_json_generator.h"
|
||||||
#include "common/graph_kernel/graph_kernel_helper.h"
|
#include "common/graph_kernel/graph_kernel_helper.h"
|
||||||
#include "common/graph_kernel/core/graph_kernel_utils.h"
|
#include "common/graph_kernel/core/graph_kernel_utils.h"
|
||||||
#if ENABLE_D
|
#include "kernel/akg/akg_kernel_build_manager.h"
|
||||||
#include "plugin/device/ascend/kernel/akg/akg_ascend_kernel_build.h"
|
|
||||||
#elif ENABLE_GPU
|
|
||||||
#include "plugin/device/gpu/kernel/akg/akg_gpu_kernel_build.h"
|
|
||||||
#endif
|
|
||||||
#include "plugin/device/cpu/kernel/akg/akg_cpu_kernel_build.h"
|
|
||||||
|
|
||||||
namespace mindspore::graphkernel {
|
namespace mindspore::graphkernel {
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -151,21 +146,13 @@ void GraphKernelBuild::Init() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init AkgKernelBuilder.
|
// Init AkgKernelBuilder.
|
||||||
#if ENABLE_D
|
if (Callback::Instance()->GetTargetFromContext() == kGPUDevice) {
|
||||||
if (Callback::Instance()->GetTargetFromContext() == kCPUDevice) {
|
kernel_builder_ = kernel::AkgKernelBuildManager::Instance().GetAkgKernelBuilder(kGPUDevice);
|
||||||
kernel_builder_ = std::make_shared<kernel::AkgCpuKernelBuilder>();
|
} else if (Callback::Instance()->GetTargetFromContext() == kAscendDevice) {
|
||||||
|
kernel_builder_ = kernel::AkgKernelBuildManager::Instance().GetAkgKernelBuilder(kAscendDevice);
|
||||||
} else {
|
} else {
|
||||||
kernel_builder_ = std::make_shared<kernel::AkgAscendKernelBuilder>();
|
kernel_builder_ = kernel::AkgKernelBuildManager::Instance().GetAkgKernelBuilder(kCPUDevice);
|
||||||
}
|
}
|
||||||
#elif ENABLE_GPU
|
|
||||||
if (Callback::Instance()->GetTargetFromContext() == kCPUDevice) {
|
|
||||||
kernel_builder_ = std::make_shared<kernel::AkgCpuKernelBuilder>();
|
|
||||||
} else {
|
|
||||||
kernel_builder_ = std::make_shared<kernel::AkgGpuKernelBuilder>();
|
|
||||||
}
|
|
||||||
#elif ENABLE_CPU
|
|
||||||
kernel_builder_ = std::make_shared<kernel::AkgCpuKernelBuilder>();
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GraphKernelBuild::Process(const FuncGraphPtr &func_graph, int iter) {
|
bool GraphKernelBuild::Process(const FuncGraphPtr &func_graph, int iter) {
|
||||||
|
|
|
@ -1767,7 +1767,6 @@ bool Debugger::TensorExistsInCurrent(const std::string &tensor_name) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef ENABLE_D
|
|
||||||
/*
|
/*
|
||||||
* Feature group: Dump.
|
* Feature group: Dump.
|
||||||
* Target device group: Ascend.
|
* Target device group: Ascend.
|
||||||
|
@ -1805,6 +1804,5 @@ void Debugger::WaitForWriteFileFinished() {
|
||||||
recheck_cnt++;
|
recheck_cnt++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -27,9 +27,7 @@
|
||||||
#include "debug/debugger/grpc_client.h"
|
#include "debug/debugger/grpc_client.h"
|
||||||
#include "debug/debug_services.h"
|
#include "debug/debug_services.h"
|
||||||
#include "runtime/device/ms_device_shape_transfer.h"
|
#include "runtime/device/ms_device_shape_transfer.h"
|
||||||
#ifdef ENABLE_D
|
|
||||||
#include "debug/dump_data_builder.h"
|
#include "debug/dump_data_builder.h"
|
||||||
#endif
|
|
||||||
#include "runtime/device/device_address.h"
|
#include "runtime/device/device_address.h"
|
||||||
#include "include/backend/visible.h"
|
#include "include/backend/visible.h"
|
||||||
|
|
||||||
|
@ -197,13 +195,11 @@ class BACKEND_EXPORT Debugger : public std::enable_shared_from_this<Debugger> {
|
||||||
|
|
||||||
std::vector<AnfNodePtr> GetParametersMindRT() const { return parameters_mindRT_; }
|
std::vector<AnfNodePtr> GetParametersMindRT() const { return parameters_mindRT_; }
|
||||||
|
|
||||||
#ifdef ENABLE_D
|
|
||||||
std::shared_ptr<DumpDataBuilder> LoadDumpDataBuilder(const std::string &node_name);
|
std::shared_ptr<DumpDataBuilder> LoadDumpDataBuilder(const std::string &node_name);
|
||||||
|
|
||||||
void ClearDumpDataBuilder(const std::string &node_name);
|
void ClearDumpDataBuilder(const std::string &node_name);
|
||||||
|
|
||||||
void WaitForWriteFileFinished();
|
void WaitForWriteFileFinished();
|
||||||
#endif
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// private constructor for singleton
|
// private constructor for singleton
|
||||||
|
@ -331,10 +327,8 @@ class BACKEND_EXPORT Debugger : public std::enable_shared_from_this<Debugger> {
|
||||||
// map to store iter num in each epoch when dataset_sink_mode is true
|
// map to store iter num in each epoch when dataset_sink_mode is true
|
||||||
std::map<uint32_t, int32_t> graph_iter_num_map_;
|
std::map<uint32_t, int32_t> graph_iter_num_map_;
|
||||||
|
|
||||||
#ifdef ENABLE_D
|
|
||||||
// to construct kernel data for async dump, key is the dump path to the node
|
// to construct kernel data for async dump, key is the dump path to the node
|
||||||
std::map<std::string, std::shared_ptr<DumpDataBuilder>> dump_data_construct_map_;
|
std::map<std::string, std::shared_ptr<DumpDataBuilder>> dump_data_construct_map_;
|
||||||
#endif
|
|
||||||
|
|
||||||
// singleton
|
// singleton
|
||||||
inline static std::mutex instance_lock_ = {};
|
inline static std::mutex instance_lock_ = {};
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "distributed/collective/collective_manager.h"
|
#include "distributed/collective/collective_manager.h"
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
#include "distributed/cluster/cluster_context.h"
|
#include "distributed/cluster/cluster_context.h"
|
||||||
#else
|
#else
|
||||||
#include "distributed/cluster/dummy_cluster_context.h"
|
#include "distributed/cluster/dummy_cluster_context.h"
|
||||||
|
|
|
@ -14,6 +14,7 @@ endif()
|
||||||
if(ENABLE_AKG AND ${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
if(ENABLE_AKG AND ${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
||||||
file(GLOB_RECURSE AKG_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
file(GLOB_RECURSE AKG_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
"akg/akg_kernel_build.cc"
|
"akg/akg_kernel_build.cc"
|
||||||
|
"akg/akg_kernel_build_manager.cc"
|
||||||
"akg/akg_kernel_json_generator.cc"
|
"akg/akg_kernel_json_generator.cc"
|
||||||
"akg/akg_kernel_json_decoder.cc"
|
"akg/akg_kernel_json_decoder.cc"
|
||||||
)
|
)
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "kernel/akg/akg_kernel_build_manager.h"
|
||||||
|
#include <memory>
|
||||||
|
namespace mindspore {
|
||||||
|
namespace kernel {
|
||||||
|
AkgKernelBuildManager &AkgKernelBuildManager::Instance() {
|
||||||
|
static AkgKernelBuildManager instance{};
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AkgKernelBuildManager::Register(const std::string &device_type, AkgKernelBuildCreator &&creator) {
|
||||||
|
if (base_map_.find(device_type) == base_map_.end()) {
|
||||||
|
(void)base_map_.emplace(device_type, creator);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<AkgKernelBuilder> AkgKernelBuildManager::GetAkgKernelBuilder(const std::string &device_type) {
|
||||||
|
auto iter = base_map_.find(device_type);
|
||||||
|
if (base_map_.end() != iter) {
|
||||||
|
MS_EXCEPTION_IF_NULL(iter->second);
|
||||||
|
return (iter->second)();
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
} // namespace kernel
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,53 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_MANAGER_H_
|
||||||
|
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_MANAGER_H_
|
||||||
|
#include "kernel/akg/akg_kernel_build.h"
|
||||||
|
#include <map>
|
||||||
|
#include <utility>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace kernel {
|
||||||
|
using AkgKernelBuildCreator = std::function<std::shared_ptr<AkgKernelBuilder>()>;
|
||||||
|
|
||||||
|
class AkgKernelBuildManager {
|
||||||
|
public:
|
||||||
|
static AkgKernelBuildManager &Instance();
|
||||||
|
void Register(const std::string &device_type, AkgKernelBuildCreator &&creator);
|
||||||
|
std::shared_ptr<AkgKernelBuilder> GetAkgKernelBuilder(const std::string &device_type);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::map<std::string, AkgKernelBuildCreator> base_map_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class AkgKernelBuildRegister {
|
||||||
|
public:
|
||||||
|
AkgKernelBuildRegister(const std::string &device_type, AkgKernelBuildCreator &&creator) {
|
||||||
|
AkgKernelBuildManager::Instance().Register(device_type, std::move(creator));
|
||||||
|
}
|
||||||
|
~AkgKernelBuildRegister() = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define REG_AKG_KERNEL_BUILDER(DEVICE_TYPE, BUILDER_CLASS) \
|
||||||
|
static const AkgKernelBuildRegister g_akg_kernel_builder_##DEVICE_TYPE##_##_reg( \
|
||||||
|
DEVICE_TYPE, []() { return std::make_shared<BUILDER_CLASS>(); });
|
||||||
|
} // namespace kernel
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_MANAGER_H_
|
|
@ -245,12 +245,15 @@ bool AscendKernelRuntime::NeedDestroyHccl() {
|
||||||
#ifndef ENABLE_SECURITY
|
#ifndef ENABLE_SECURITY
|
||||||
void AsyncDataDumpUninit() {
|
void AsyncDataDumpUninit() {
|
||||||
if (DumpJsonParser::GetInstance().async_dump_enabled()) {
|
if (DumpJsonParser::GetInstance().async_dump_enabled()) {
|
||||||
#if ENABLE_D
|
auto ms_context = MsContext::GetInstance();
|
||||||
// When it is A+M dump mode, wait until file save is finished.
|
MS_EXCEPTION_IF_NULL(ms_context);
|
||||||
if (DumpJsonParser::GetInstance().FileFormatIsNpy()) {
|
auto device_type = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||||
Debugger::GetInstance()->WaitForWriteFileFinished();
|
if (device_type == kAscendDevice) {
|
||||||
|
// When it is A+M dump mode, wait until file save is finished.
|
||||||
|
if (DumpJsonParser::GetInstance().FileFormatIsNpy()) {
|
||||||
|
Debugger::GetInstance()->WaitForWriteFileFinished();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
if (AdxDataDumpServerUnInit() != 0) {
|
if (AdxDataDumpServerUnInit() != 0) {
|
||||||
MS_LOG(ERROR) << "Adx data dump server uninit failed";
|
MS_LOG(ERROR) << "Adx data dump server uninit failed";
|
||||||
}
|
}
|
||||||
|
|
|
@ -240,7 +240,7 @@ void DumpInit(uint32_t device_id) {
|
||||||
json_parser.CopyHcclJsonToDir(device_id);
|
json_parser.CopyHcclJsonToDir(device_id);
|
||||||
json_parser.CopyMSCfgJsonToDir(device_id);
|
json_parser.CopyMSCfgJsonToDir(device_id);
|
||||||
if (json_parser.async_dump_enabled()) {
|
if (json_parser.async_dump_enabled()) {
|
||||||
#ifdef ENABLE_D
|
#if !(defined(ENABLE_TEST) || defined(ENABLE_TESTCASES))
|
||||||
// register callback to adx
|
// register callback to adx
|
||||||
if (json_parser.FileFormatIsNpy()) {
|
if (json_parser.FileFormatIsNpy()) {
|
||||||
AdxRegDumpProcessCallBack(DumpDataCallBack);
|
AdxRegDumpProcessCallBack(DumpDataCallBack);
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include "ir/anf.h"
|
#include "ir/anf.h"
|
||||||
#include "kernel/akg/akg_kernel_build.h"
|
#include "kernel/akg/akg_kernel_build.h"
|
||||||
|
#include "kernel/akg/akg_kernel_build_manager.h"
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace kernel {
|
namespace kernel {
|
||||||
|
@ -39,6 +40,8 @@ class AkgAscendKernelBuilder : public AkgKernelBuilder {
|
||||||
const AnfNodePtr &anf_node) override;
|
const AnfNodePtr &anf_node) override;
|
||||||
void AkgSaveJsonInfo(const string &kernel_name, const string &kernel_json) override;
|
void AkgSaveJsonInfo(const string &kernel_name, const string &kernel_json) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
REG_AKG_KERNEL_BUILDER(kAscendDevice, AkgAscendKernelBuilder);
|
||||||
} // namespace kernel
|
} // namespace kernel
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,9 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
|
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
|
||||||
#include "plugin/device/cpu/hal/device/cpu_memory_manager.h"
|
#include "plugin/device/cpu/hal/device/cpu_memory_manager.h"
|
||||||
|
#ifdef ENABLE_AKG
|
||||||
#include "plugin/device/cpu/kernel/akg/akg_cpu_kernel_build.h"
|
#include "plugin/device/cpu/kernel/akg/akg_cpu_kernel_build.h"
|
||||||
|
#endif
|
||||||
#include "plugin/factory/ms_factory.h"
|
#include "plugin/factory/ms_factory.h"
|
||||||
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
||||||
#include "kernel/kernel_build_info.h"
|
#include "kernel/kernel_build_info.h"
|
||||||
|
@ -40,7 +42,7 @@
|
||||||
#include "backend/common/session/anf_runtime_algorithm.h"
|
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||||
#include "include/common/utils/anfalgo.h"
|
#include "include/common/utils/anfalgo.h"
|
||||||
#include "profiler/device/cpu/cpu_profiling.h"
|
#include "profiler/device/cpu/cpu_profiling.h"
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
#include "plugin/device/cpu/hal/hardware/ms_collective_comm_lib.h"
|
#include "plugin/device/cpu/hal/hardware/ms_collective_comm_lib.h"
|
||||||
#endif
|
#endif
|
||||||
#ifndef ENABLE_SECURITY
|
#ifndef ENABLE_SECURITY
|
||||||
|
@ -373,7 +375,7 @@ bool CPUDeviceContext::LoadCollectiveCommLib() {
|
||||||
collective_comm_lib_ = instance_func();
|
collective_comm_lib_ = instance_func();
|
||||||
MS_EXCEPTION_IF_NULL(collective_comm_lib_);
|
MS_EXCEPTION_IF_NULL(collective_comm_lib_);
|
||||||
} else {
|
} else {
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
collective_comm_lib_ = &MsCollectiveCommLib::GetInstance();
|
collective_comm_lib_ = &MsCollectiveCommLib::GetInstance();
|
||||||
MS_EXCEPTION_IF_NULL(collective_comm_lib_);
|
MS_EXCEPTION_IF_NULL(collective_comm_lib_);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_CPU_AKG_CPU_KERNEL_BUILD_H_
|
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_CPU_AKG_CPU_KERNEL_BUILD_H_
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "kernel/akg/akg_kernel_build.h"
|
#include "kernel/akg/akg_kernel_build.h"
|
||||||
|
#include "kernel/akg/akg_kernel_build_manager.h"
|
||||||
#include "base/base.h"
|
#include "base/base.h"
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
|
@ -32,6 +33,7 @@ class AkgCpuKernelBuilder : public AkgKernelBuilder {
|
||||||
const AnfNodePtr &anf_node) override;
|
const AnfNodePtr &anf_node) override;
|
||||||
void AkgSaveJsonInfo(const string &kernel_name, const string &kernel_json) override;
|
void AkgSaveJsonInfo(const string &kernel_name, const string &kernel_json) override;
|
||||||
};
|
};
|
||||||
|
REG_AKG_KERNEL_BUILDER(kCPUDevice, AkgCpuKernelBuilder);
|
||||||
} // namespace kernel
|
} // namespace kernel
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_CPU_AKG_CPU_KERNEL_BUILD_H_
|
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_CPU_AKG_CPU_KERNEL_BUILD_H_
|
||||||
|
|
|
@ -20,13 +20,13 @@
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
#include "plugin/device/cpu/hal/hardware/ms_collective_comm_lib.h"
|
#include "plugin/device/cpu/hal/hardware/ms_collective_comm_lib.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace kernel {
|
namespace kernel {
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
using device::CollectiveOpReduceType::Reduce_Sum;
|
using device::CollectiveOpReduceType::Reduce_Sum;
|
||||||
using device::cpu::kMCCLGlobalGroupName;
|
using device::cpu::kMCCLGlobalGroupName;
|
||||||
using device::cpu::MsCollectiveCommLib;
|
using device::cpu::MsCollectiveCommLib;
|
||||||
|
@ -37,7 +37,7 @@ constexpr char kSupportedReduceOp[] = "sum";
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void AllReduceCPUKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
void AllReduceCPUKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||||
auto kernel_attr = GetKernelAttrFromNode(kernel_node);
|
auto kernel_attr = GetKernelAttrFromNode(kernel_node);
|
||||||
|
@ -67,7 +67,7 @@ std::vector<KernelAttr> AllReduceCPUKernelMod::GetOpSupport() {
|
||||||
bool AllReduceCPUKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
bool AllReduceCPUKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||||
const std::vector<kernel::AddressPtr> &,
|
const std::vector<kernel::AddressPtr> &,
|
||||||
const std::vector<kernel::AddressPtr> &outputs) {
|
const std::vector<kernel::AddressPtr> &outputs) {
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
if (inputs.empty() || outputs.empty()) {
|
if (inputs.empty() || outputs.empty()) {
|
||||||
MS_LOG(EXCEPTION) << kernel_name_ << " has at least one input and one output, but got 0.";
|
MS_LOG(EXCEPTION) << kernel_name_ << " has at least one input and one output, but got 0.";
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
|
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "kernel/akg/akg_kernel_build.h"
|
#include "kernel/akg/akg_kernel_build.h"
|
||||||
|
#include "kernel/akg/akg_kernel_build_manager.h"
|
||||||
#include "base/base.h"
|
#include "base/base.h"
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
|
@ -33,6 +34,7 @@ class AkgGpuKernelBuilder : public AkgKernelBuilder {
|
||||||
void AkgSaveJsonInfo(const string &kernel_name, const string &kernel_json) override;
|
void AkgSaveJsonInfo(const string &kernel_name, const string &kernel_json) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
REG_AKG_KERNEL_BUILDER(kGPUDevice, AkgGpuKernelBuilder);
|
||||||
} // namespace kernel
|
} // namespace kernel
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
#include "include/common/utils/parallel_context.h"
|
#include "include/common/utils/parallel_context.h"
|
||||||
#include "include/common/debug/env_config_parser.h"
|
#include "include/common/debug/env_config_parser.h"
|
||||||
#include "plugin/device/ascend/hal/device/ascend_device_address.h"
|
#include "plugin/device/ascend/hal/device/ascend_device_address.h"
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
#ifdef WITH_BACKEND
|
||||||
#include "ps/ps_cache/ps_cache_manager.h"
|
#include "ps/ps_cache/ps_cache_manager.h"
|
||||||
#endif
|
#endif
|
||||||
#include "kernel/common_utils.h"
|
#include "kernel/common_utils.h"
|
||||||
|
@ -600,7 +600,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph &graph) {
|
||||||
}
|
}
|
||||||
add_need_alloc_nodes(input_node);
|
add_need_alloc_nodes(input_node);
|
||||||
}
|
}
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
#ifdef WITH_BACKEND
|
||||||
bool ps_cache_check = false;
|
bool ps_cache_check = false;
|
||||||
#endif
|
#endif
|
||||||
std::map<AnfNodePtr, AnfNodePtr> shadow_backend_node_map;
|
std::map<AnfNodePtr, AnfNodePtr> shadow_backend_node_map;
|
||||||
|
@ -616,7 +616,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph &graph) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
DeviceAddressPtr device_address = GetInternalDeviceAddress(graph, item);
|
DeviceAddressPtr device_address = GetInternalDeviceAddress(graph, item);
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
const std::string ¶m_name = item->fullname_with_scope();
|
const std::string ¶m_name = item->fullname_with_scope();
|
||||||
if (ps::ps_cache_instance.IsHashTable(param_name)) {
|
if (ps::ps_cache_instance.IsHashTable(param_name)) {
|
||||||
MS_LOG(INFO) << "Parameter(" << param_name << ")"
|
MS_LOG(INFO) << "Parameter(" << param_name << ")"
|
||||||
|
@ -1833,7 +1833,7 @@ void KernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
|
||||||
MS_LOG(INFO) << "Clear graph:" << graph_id << " runtime resource";
|
MS_LOG(INFO) << "Clear graph:" << graph_id << " runtime resource";
|
||||||
}
|
}
|
||||||
|
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
#ifdef WITH_BACKEND
|
||||||
namespace {
|
namespace {
|
||||||
// Finalize ps cache module before throw an exception.
|
// Finalize ps cache module before throw an exception.
|
||||||
void FinalizePsCache(const std::string &exception) {
|
void FinalizePsCache(const std::string &exception) {
|
||||||
|
|
|
@ -193,7 +193,7 @@ class KernelRuntime {
|
||||||
void RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, const session::KernelGraph &graph);
|
void RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, const session::KernelGraph &graph);
|
||||||
void AssignValueNodeTensor(const ValueNodePtr &value_node, const ValuePtr &node_value, size_t output_idx);
|
void AssignValueNodeTensor(const ValueNodePtr &value_node, const ValuePtr &node_value, size_t output_idx);
|
||||||
DeviceAddressPtr PreAssignCNodeMemory(const AnfNodePtr &anf_node, size_t index) const;
|
DeviceAddressPtr PreAssignCNodeMemory(const AnfNodePtr &anf_node, size_t index) const;
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
#ifdef WITH_BACKEND
|
||||||
void GetFirstPSEmbeddingCache(const session::KernelGraph &graph, AnfNodePtr *const first_cache_input_index,
|
void GetFirstPSEmbeddingCache(const session::KernelGraph &graph, AnfNodePtr *const first_cache_input_index,
|
||||||
size_t *const first_cache_size);
|
size_t *const first_cache_size);
|
||||||
void CheckIfSupportPSEmbeddingCache(const session::KernelGraph &graph);
|
void CheckIfSupportPSEmbeddingCache(const session::KernelGraph &graph);
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
|
|
||||||
#include "runtime/device/kernel_runtime_manager.h"
|
#include "runtime/device/kernel_runtime_manager.h"
|
||||||
#include "utils/log_adapter.h"
|
#include "utils/log_adapter.h"
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
#ifdef WITH_BACKEND
|
||||||
#include "ps/ps_cache/ps_cache_manager.h"
|
#include "ps/ps_cache/ps_cache_manager.h"
|
||||||
#endif
|
#endif
|
||||||
#include "backend/common/session/pynative_task_manager.h"
|
#include "backend/common/session/pynative_task_manager.h"
|
||||||
|
@ -26,7 +26,7 @@ namespace device {
|
||||||
void KernelRuntimeManager::ClearRuntimeResource() {
|
void KernelRuntimeManager::ClearRuntimeResource() {
|
||||||
// Just remove PyNative tasks before runtime resource release.
|
// Just remove PyNative tasks before runtime resource release.
|
||||||
session::PynativeTaskManager::GetInstance().Reset();
|
session::PynativeTaskManager::GetInstance().Reset();
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
#ifdef WITH_BACKEND
|
||||||
if (ps::PSContext::instance()->is_worker() && ps::PsDataPrefetch::GetInstance().cache_enable()) {
|
if (ps::PSContext::instance()->is_worker() && ps::PsDataPrefetch::GetInstance().cache_enable()) {
|
||||||
ps::ps_cache_instance.SyncEmbeddingTable();
|
ps::ps_cache_instance.SyncEmbeddingTable();
|
||||||
}
|
}
|
||||||
|
@ -132,7 +132,7 @@ void KernelRuntimeManager::ReleaseKernelRuntime(const std::string &device_name,
|
||||||
if (runtime == nullptr) {
|
if (runtime == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
#ifdef WITH_BACKEND
|
||||||
if (ps::PSContext::instance()->is_worker() && ps::PsDataPrefetch::GetInstance().cache_enable()) {
|
if (ps::PSContext::instance()->is_worker() && ps::PsDataPrefetch::GetInstance().cache_enable()) {
|
||||||
ps::ps_cache_instance.SyncEmbeddingTable();
|
ps::ps_cache_instance.SyncEmbeddingTable();
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
#ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_SET_H_
|
#ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_SET_H_
|
||||||
#define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_SET_H_
|
#define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_SET_H_
|
||||||
|
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && (!defined __APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
#define ENABLE_RPC_ACTOR
|
#define ENABLE_RPC_ACTOR
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -172,7 +172,7 @@ void IntHandler(int, siginfo_t *, void *) {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && (!defined _WIN64) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
bool SendFinishTransform(const std::string &actor_set_name) {
|
bool SendFinishTransform(const std::string &actor_set_name) {
|
||||||
auto node = ClusterContext::instance()->node();
|
auto node = ClusterContext::instance()->node();
|
||||||
MS_EXCEPTION_IF_NULL(node);
|
MS_EXCEPTION_IF_NULL(node);
|
||||||
|
@ -502,7 +502,7 @@ ActorSet *GraphScheduler::Transform(const GraphCompilerInfo &graph_compiler_info
|
||||||
Optimize(actor_set);
|
Optimize(actor_set);
|
||||||
MS_LOG(INFO) << "Graph(" << graph_compiler_info.name_ << ") transforms actor end.";
|
MS_LOG(INFO) << "Graph(" << graph_compiler_info.name_ << ") transforms actor end.";
|
||||||
|
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && (!defined _WIN64) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
if (ClusterContext::instance()->initialized() && RecoveryContext::GetInstance()->enable_recovery()) {
|
if (ClusterContext::instance()->initialized() && RecoveryContext::GetInstance()->enable_recovery()) {
|
||||||
while (!SendFinishTransform(graph_compiler_info.name_)) {
|
while (!SendFinishTransform(graph_compiler_info.name_)) {
|
||||||
MS_LOG(WARNING) << "Send finish transform graph failed.";
|
MS_LOG(WARNING) << "Send finish transform graph failed.";
|
||||||
|
@ -599,7 +599,7 @@ void GraphScheduler::Run(ActorSet *const actor_set, const std::vector<DeviceCont
|
||||||
const size_t kSecondsToMilliseconds = 1000;
|
const size_t kSecondsToMilliseconds = 1000;
|
||||||
SetActorExecutionStrategy(actor_set, strategy, (end_time - start_time) * kSecondsToMilliseconds);
|
SetActorExecutionStrategy(actor_set, strategy, (end_time - start_time) * kSecondsToMilliseconds);
|
||||||
|
|
||||||
#if ((defined ENABLE_CPU) && (!defined _WIN32) && (!defined _WIN64) && !defined(__APPLE__))
|
#ifdef WITH_BACKEND
|
||||||
DoDisasterRecovery(actor_set->name_);
|
DoDisasterRecovery(actor_set->name_);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue