!35520 backend compiled macro

Merge pull request !35520 from liubuyu/backend
This commit is contained in:
i-robot 2022-06-13 01:34:48 +00:00 committed by Gitee
commit 758dff008d
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
28 changed files with 173 additions and 88 deletions

View File

@ -123,7 +123,7 @@ if(ENABLE_TESTCASES OR (NOT ENABLE_D))
endif()
if(NOT (ENABLE_TESTCASES OR ENABLE_TEST) AND NOT (CMAKE_SYSTEM_NAME MATCHES "Windows" OR
CMAKE_SYSTEM_NAME MATCHES "Darwin"))
CMAKE_SYSTEM_NAME MATCHES "Darwin") AND (ENABLE_D OR ENABLE_GPU OR ENABLE_CPU))
add_compile_definitions(WITH_BACKEND)
endif()

View File

@ -69,7 +69,7 @@
#include "include/common/debug/rdr/recorder_manager.h"
#include "debug/rdr/graph_recorder.h"
#endif
#if ENABLE_CPU && ENABLE_D
#ifdef WITH_BACKEND
#include "ps/util.h"
#include "ps/ps_cache/ps_cache_manager.h"
#endif
@ -247,7 +247,7 @@ bool TensorNeedSync(const std::shared_ptr<KernelGraph> &kernel_graph, const AnfN
}
MS_EXCEPTION_IF_NULL(memcpy_nums);
(*memcpy_nums)++;
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#ifdef WITH_BACKEND
const std::string &param_name = parameter->fullname_with_scope();
if (ps::ps_cache_instance.IsHashTable(param_name)) {
return false;
@ -346,7 +346,7 @@ void AscendSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_gra
}
if (AnfAlgo::OutputAddrExist(input_node, 0) &&
TensorNeedSync(kernel_graph, input_node, tensor, &device_memcpy_nums)) {
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#ifdef WITH_BACKEND
const std::string &param_name = input_node->fullname_with_scope();
if (ps::ps_cache_instance.IsHashTable(param_name)) {
continue;
@ -459,7 +459,7 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
// adjust kernel
AdjustKernel(root_graph);
#if ENABLE_CPU && ENABLE_D
#ifdef WITH_BACKEND
InitPsWorker(root_graph);
#endif
// assign stream
@ -538,7 +538,7 @@ void AscendSession::BuildGraphImpl(GraphId graph_id) {
single_graph->UpdateExecuteKernelStreamLabel();
// adjust execution order because merge child graph and other special operations
AdjustKernel(graph);
#if ENABLE_CPU && ENABLE_D
#ifdef WITH_BACKEND
InitPsWorker(graph);
#endif
// Assign streams for control sink and hccl and so on
@ -616,7 +616,7 @@ void AscendSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_g
debugger_->PreExecute(kernel_graph);
}
#endif
#if ENABLE_CPU && ENABLE_D
#ifdef WITH_BACKEND
// Initialize parameter server
InitPSParamAndOptim(kernel_graph, inputs);
std::string channel_name;
@ -1000,17 +1000,19 @@ void AscendSession::BuildOpsInGraph(const GraphId &graph_id, const std::map<AnfN
}
#ifndef ENABLE_SECURITY
void DumpInit(uint32_t device_id) {
void DumpInit(const std::string &device_type, uint32_t device_id) {
auto &json_parser = DumpJsonParser::GetInstance();
json_parser.Parse();
json_parser.CopyDumpJsonToDir(device_id);
json_parser.CopyHcclJsonToDir(device_id);
json_parser.CopyMSCfgJsonToDir(device_id);
if (json_parser.async_dump_enabled()) {
#ifdef ENABLE_D
// register callback to adx
if (json_parser.FileFormatIsNpy()) {
AdxRegDumpProcessCallBack(DumpDataCallBack);
#if !(defined(ENABLE_TEST) || defined(ENABLE_TESTCASES))
if (device_type == kAscendDevice) {
// register callback to adx
if (json_parser.FileFormatIsNpy()) {
AdxRegDumpProcessCallBack(DumpDataCallBack);
}
}
#endif
if (AdxDataDumpServerInit() != 0) {
@ -1035,7 +1037,8 @@ void AscendSession::InitRuntimeResource() {
rank_id_ = GetRankId();
}
#ifndef ENABLE_SECURITY
DumpInit(rank_id_);
auto device_type = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
DumpInit(device_type, rank_id_);
#endif
MS_LOG(INFO) << "Status record: end init runtime resource.";
}

View File

@ -27,7 +27,9 @@
#include "plugin/factory/ms_factory.h"
#include "runtime/device/kernel_runtime.h"
#include "plugin/device/cpu/kernel/cpu_kernel.h"
#ifdef ENABLE_AKG
#include "plugin/device/cpu/kernel/akg/akg_cpu_kernel_build.h"
#endif
#include "plugin/device/cpu/hal/device/kernel_select_cpu.h"
#include "backend/common/optimizer/optimizer.h"
#include "backend/common/optimizer/pass_manager.h"
@ -42,7 +44,7 @@
#ifndef ENABLE_SECURITY
#include "debug/data_dump/dump_json_parser.h"
#endif
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#ifdef WITH_BACKEND
#include "ps/util.h"
#include "ps/ps_context.h"
#endif
@ -87,7 +89,7 @@ void CPUSession::Reorder(std::vector<CNodePtr> *node_list) {
void CPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
auto optimizer = std::make_shared<opt::GraphOptimizer>();
auto pm = std::make_shared<opt::PassManager>();
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode && ps::PSContext::instance()->is_ps_mode()) {
@ -195,7 +197,7 @@ void CPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_grap
MS_LOG(INFO) << "Bind input output address";
runtime_.BindInputOutput(kernel_graph.get(), inputs, outputs);
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
InitPSParamAndOptim(kernel_graph, inputs);
#endif
}

View File

@ -22,9 +22,6 @@
#include "include/common/utils/comm_manager.h"
#include "include/common/utils/scoped_long_running.h"
#include "pybind_api/ir/tensor_py.h"
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#include "ps/ps_cache/ps_cache_manager.h"
#endif
using mindspore::tensor::TensorPy;
namespace mindspore {

View File

@ -85,7 +85,7 @@
#include "common/graph_kernel/graph_kernel_flags.h"
#include "include/common/utils/utils.h"
#include "abstract/utils.h"
#if ENABLE_CPU && ENABLE_GPU
#ifdef WITH_BACKEND
#include "ps/util.h"
#include "ps/ps_cache/ps_cache_manager.h"
#endif
@ -358,7 +358,7 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
auto input_node = input_nodes[i];
MS_EXCEPTION_IF_NULL(input_node);
if (input_node->isa<Parameter>() && AnfAlgo::OutputAddrExist(input_node, 0)) {
#if ENABLE_CPU && ENABLE_GPU
#ifdef WITH_BACKEND
const std::string &param_name = input_node->fullname_with_scope();
if (ps::ps_cache_instance.IsHashTable(param_name)) {
continue;
@ -438,7 +438,7 @@ GraphId GPUSession::CompileGraphImpl(const KernelGraphPtr &graph) {
GraphKernelOptimize(graph);
// Start gpu kernel runtime
StartKernelRT();
#if ENABLE_CPU && ENABLE_GPU
#ifdef WITH_BACKEND
InitPsWorker(graph);
#endif
// Assign CUDA streams
@ -518,7 +518,7 @@ void GPUSession::PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_grap
E2eDump::UpdateIterOldRTDump(kernel_graph.get());
#endif
#if ENABLE_CPU && ENABLE_GPU
#ifdef WITH_BACKEND
// Initialize parameter server
InitPSParamAndOptim(kernel_graph, inputs);
#endif
@ -555,7 +555,7 @@ void GPUSession::ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph)
int kernel_num = kernel_graph->execution_order().size();
int64_t loopsize = (kernel_num > 1) ? ConfigManager::GetInstance().gpu_loopsink_size() : 1;
for (int64_t i = 0; i < loopsize; i++) {
#if ENABLE_CPU && ENABLE_GPU
#ifdef WITH_BACKEND
std::string channel_name;
if (ps::PsDataPrefetch::GetInstance().cache_enable() && IsGetNextGraph(kernel_graph, &channel_name)) {
ps::ps_cache_instance.IncreaseGraphStep(channel_name);
@ -605,7 +605,7 @@ void GPUSession::UpdateOutputTensors(const VectorRef *outputs,
// But one time memory application scenarios need to be skipped, because the memory is not allocated next step:
// 1. Non cnode 2. Communication kernel.
bool ps_mode = false;
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#ifdef WITH_BACKEND
ps_mode = ps::PSContext::instance()->is_ps_mode();
#endif
if (node->isa<CNode>() && !common::AnfAlgo::IsCommunicationOp(node) && !ps_mode) {

View File

@ -46,7 +46,7 @@
#include "utils/file_utils.h"
#include "utils/trace_base.h"
#include "include/common/utils/parallel_context.h"
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
#include "ps/ps_cache/ps_cache_manager.h"
#include "ps/constants.h"
#include "ps/util.h"
@ -520,7 +520,7 @@ void SetReturnNode(const AnfNodePtr &node, KernelGraph *graph) {
}
}
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
// Get all users of this node
void GetNodeUsedList(const FuncGraphPtr &kernel_graph, const AnfNodePtr &node,
std::vector<AnfNodePtr> *node_users_list) {
@ -3010,7 +3010,7 @@ void SessionBasic::DumpGraphs(const std::vector<KernelGraphPtr> &graphs) {
void SessionBasic::UnifyMindIR(const KernelGraphPtr &graph) { opt::CommonUnifyMindIR(graph); }
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
void SessionBasic::InitPsWorker(const KernelGraphPtr &kernel_graph) {
if (!ps::PSContext::instance()->is_worker()) {
return;

View File

@ -346,7 +346,7 @@ class BACKEND_EXPORT SessionBasic : public std::enable_shared_from_this<SessionB
std::vector<uint32_t> GetAllReduceSplitIndex();
virtual std::string GetCommWorldGroup() { return std::string(); }
void DumpGraphs(const std::vector<KernelGraphPtr> &graphs);
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
void CheckPSModeConsistence(const KernelGraphPtr &kernel_graph) const;
void GetBatchElements(const AnfNodePtr &kernel_node) const;
void InitPsWorker(const KernelGraphPtr &kernel_graph);

View File

@ -40,16 +40,13 @@
#include "runtime/pynative/graph_adapter.h"
#include "distributed/recovery/recovery_context.h"
#include "include/common/utils/scoped_long_running.h"
#ifdef ENABLE_D
#include "include/common/utils/callbacks_ge.h"
#endif
#ifdef ENABLE_DEBUGGER
#include "debug/debugger/debugger.h"
#endif
#ifndef ENABLE_SECURITY
#include "debug/data_dump/dump_json_parser.h"
#endif
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
#include "ps/ps_context.h"
#endif
@ -352,7 +349,7 @@ VectorRef MsBackend::MsRunGraph(const GraphId &g, const VectorRef &args, const s
const session::SessionPtr &exe_session = ((target != target_device_ && !target.empty()) ? other_sess_ : target_sess_);
MS_EXCEPTION_IF_NULL(exe_session);
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
// If in PS mode, must use sync mode to run graph in case that the weights on server are not updated in the last step.
if (ps::PSContext::instance()->is_ps_mode()) {
exe_session->RunGraph(g, inputs, &outputs);

View File

@ -28,7 +28,7 @@
#include "ir/graph_utils.h"
#include "utils/ms_context.h"
#include "utils/trace_base.h"
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#ifdef WITH_BACKEND
#include "ps/ps_context.h"
#endif
@ -598,7 +598,7 @@ void SetMindRTEnable() {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
#if ((defined ENABLE_CPU) && (!defined _WIN32) && (!defined __APPLE__))
#ifdef WITH_BACKEND
if (ps::PSContext::instance()->is_ps_mode() && !ps::PSContext::instance()->enable_distributed_mindrt()) {
context_ptr->set_param<bool>(MS_CTX_ENABLE_MINDRT, false);
return;

View File

@ -26,12 +26,7 @@
#include "kernel/akg/akg_kernel_json_generator.h"
#include "common/graph_kernel/graph_kernel_helper.h"
#include "common/graph_kernel/core/graph_kernel_utils.h"
#if ENABLE_D
#include "plugin/device/ascend/kernel/akg/akg_ascend_kernel_build.h"
#elif ENABLE_GPU
#include "plugin/device/gpu/kernel/akg/akg_gpu_kernel_build.h"
#endif
#include "plugin/device/cpu/kernel/akg/akg_cpu_kernel_build.h"
#include "kernel/akg/akg_kernel_build_manager.h"
namespace mindspore::graphkernel {
namespace {
@ -151,21 +146,13 @@ void GraphKernelBuild::Init() {
}
// Init AkgKernelBuilder.
#if ENABLE_D
if (Callback::Instance()->GetTargetFromContext() == kCPUDevice) {
kernel_builder_ = std::make_shared<kernel::AkgCpuKernelBuilder>();
if (Callback::Instance()->GetTargetFromContext() == kGPUDevice) {
kernel_builder_ = kernel::AkgKernelBuildManager::Instance().GetAkgKernelBuilder(kGPUDevice);
} else if (Callback::Instance()->GetTargetFromContext() == kAscendDevice) {
kernel_builder_ = kernel::AkgKernelBuildManager::Instance().GetAkgKernelBuilder(kAscendDevice);
} else {
kernel_builder_ = std::make_shared<kernel::AkgAscendKernelBuilder>();
kernel_builder_ = kernel::AkgKernelBuildManager::Instance().GetAkgKernelBuilder(kCPUDevice);
}
#elif ENABLE_GPU
if (Callback::Instance()->GetTargetFromContext() == kCPUDevice) {
kernel_builder_ = std::make_shared<kernel::AkgCpuKernelBuilder>();
} else {
kernel_builder_ = std::make_shared<kernel::AkgGpuKernelBuilder>();
}
#elif ENABLE_CPU
kernel_builder_ = std::make_shared<kernel::AkgCpuKernelBuilder>();
#endif
}
bool GraphKernelBuild::Process(const FuncGraphPtr &func_graph, int iter) {

View File

@ -1767,7 +1767,6 @@ bool Debugger::TensorExistsInCurrent(const std::string &tensor_name) {
return false;
}
#ifdef ENABLE_D
/*
* Feature group: Dump.
* Target device group: Ascend.
@ -1805,6 +1804,5 @@ void Debugger::WaitForWriteFileFinished() {
recheck_cnt++;
}
}
#endif
} // namespace mindspore

View File

@ -27,9 +27,7 @@
#include "debug/debugger/grpc_client.h"
#include "debug/debug_services.h"
#include "runtime/device/ms_device_shape_transfer.h"
#ifdef ENABLE_D
#include "debug/dump_data_builder.h"
#endif
#include "runtime/device/device_address.h"
#include "include/backend/visible.h"
@ -197,13 +195,11 @@ class BACKEND_EXPORT Debugger : public std::enable_shared_from_this<Debugger> {
std::vector<AnfNodePtr> GetParametersMindRT() const { return parameters_mindRT_; }
#ifdef ENABLE_D
std::shared_ptr<DumpDataBuilder> LoadDumpDataBuilder(const std::string &node_name);
void ClearDumpDataBuilder(const std::string &node_name);
void WaitForWriteFileFinished();
#endif
private:
// private constructor for singleton
@ -331,10 +327,8 @@ class BACKEND_EXPORT Debugger : public std::enable_shared_from_this<Debugger> {
// map to store iter num in each epoch when dataset_sink_mode is true
std::map<uint32_t, int32_t> graph_iter_num_map_;
#ifdef ENABLE_D
// to construct kernel data for async dump, key is the dump path to the node
std::map<std::string, std::shared_ptr<DumpDataBuilder>> dump_data_construct_map_;
#endif
// singleton
inline static std::mutex instance_lock_ = {};

View File

@ -20,7 +20,7 @@
#include <vector>
#include <string>
#include "distributed/collective/collective_manager.h"
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
#include "distributed/cluster/cluster_context.h"
#else
#include "distributed/cluster/dummy_cluster_context.h"

View File

@ -14,6 +14,7 @@ endif()
if(ENABLE_AKG AND ${CMAKE_SYSTEM_NAME} MATCHES "Linux")
file(GLOB_RECURSE AKG_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"akg/akg_kernel_build.cc"
"akg/akg_kernel_build_manager.cc"
"akg/akg_kernel_json_generator.cc"
"akg/akg_kernel_json_decoder.cc"
)

View File

@ -0,0 +1,41 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/akg/akg_kernel_build_manager.h"
#include <memory>
namespace mindspore {
namespace kernel {
AkgKernelBuildManager &AkgKernelBuildManager::Instance() {
static AkgKernelBuildManager instance{};
return instance;
}
void AkgKernelBuildManager::Register(const std::string &device_type, AkgKernelBuildCreator &&creator) {
if (base_map_.find(device_type) == base_map_.end()) {
(void)base_map_.emplace(device_type, creator);
}
}
std::shared_ptr<AkgKernelBuilder> AkgKernelBuildManager::GetAkgKernelBuilder(const std::string &device_type) {
auto iter = base_map_.find(device_type);
if (base_map_.end() != iter) {
MS_EXCEPTION_IF_NULL(iter->second);
return (iter->second)();
}
return nullptr;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,53 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_MANAGER_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_MANAGER_H_
#include "kernel/akg/akg_kernel_build.h"
#include <map>
#include <utility>
#include <memory>
#include <string>
namespace mindspore {
namespace kernel {
using AkgKernelBuildCreator = std::function<std::shared_ptr<AkgKernelBuilder>()>;
class AkgKernelBuildManager {
public:
static AkgKernelBuildManager &Instance();
void Register(const std::string &device_type, AkgKernelBuildCreator &&creator);
std::shared_ptr<AkgKernelBuilder> GetAkgKernelBuilder(const std::string &device_type);
private:
std::map<std::string, AkgKernelBuildCreator> base_map_;
};
class AkgKernelBuildRegister {
public:
AkgKernelBuildRegister(const std::string &device_type, AkgKernelBuildCreator &&creator) {
AkgKernelBuildManager::Instance().Register(device_type, std::move(creator));
}
~AkgKernelBuildRegister() = default;
};
#define REG_AKG_KERNEL_BUILDER(DEVICE_TYPE, BUILDER_CLASS) \
static const AkgKernelBuildRegister g_akg_kernel_builder_##DEVICE_TYPE##_##_reg( \
DEVICE_TYPE, []() { return std::make_shared<BUILDER_CLASS>(); });
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_MANAGER_H_

View File

@ -245,12 +245,15 @@ bool AscendKernelRuntime::NeedDestroyHccl() {
#ifndef ENABLE_SECURITY
void AsyncDataDumpUninit() {
if (DumpJsonParser::GetInstance().async_dump_enabled()) {
#if ENABLE_D
// When it is A+M dump mode, wait until file save is finished.
if (DumpJsonParser::GetInstance().FileFormatIsNpy()) {
Debugger::GetInstance()->WaitForWriteFileFinished();
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
auto device_type = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
if (device_type == kAscendDevice) {
// When it is A+M dump mode, wait until file save is finished.
if (DumpJsonParser::GetInstance().FileFormatIsNpy()) {
Debugger::GetInstance()->WaitForWriteFileFinished();
}
}
#endif
if (AdxDataDumpServerUnInit() != 0) {
MS_LOG(ERROR) << "Adx data dump server uninit failed";
}

View File

@ -240,7 +240,7 @@ void DumpInit(uint32_t device_id) {
json_parser.CopyHcclJsonToDir(device_id);
json_parser.CopyMSCfgJsonToDir(device_id);
if (json_parser.async_dump_enabled()) {
#ifdef ENABLE_D
#if !(defined(ENABLE_TEST) || defined(ENABLE_TESTCASES))
// register callback to adx
if (json_parser.FileFormatIsNpy()) {
AdxRegDumpProcessCallBack(DumpDataCallBack);

View File

@ -23,6 +23,7 @@
#include <map>
#include "ir/anf.h"
#include "kernel/akg/akg_kernel_build.h"
#include "kernel/akg/akg_kernel_build_manager.h"
namespace mindspore {
namespace kernel {
@ -39,6 +40,8 @@ class AkgAscendKernelBuilder : public AkgKernelBuilder {
const AnfNodePtr &anf_node) override;
void AkgSaveJsonInfo(const string &kernel_name, const string &kernel_json) override;
};
REG_AKG_KERNEL_BUILDER(kAscendDevice, AkgAscendKernelBuilder);
} // namespace kernel
} // namespace mindspore

View File

@ -18,7 +18,9 @@
#include <string>
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
#include "plugin/device/cpu/hal/device/cpu_memory_manager.h"
#ifdef ENABLE_AKG
#include "plugin/device/cpu/kernel/akg/akg_cpu_kernel_build.h"
#endif
#include "plugin/factory/ms_factory.h"
#include "plugin/device/cpu/kernel/cpu_kernel.h"
#include "kernel/kernel_build_info.h"
@ -40,7 +42,7 @@
#include "backend/common/session/anf_runtime_algorithm.h"
#include "include/common/utils/anfalgo.h"
#include "profiler/device/cpu/cpu_profiling.h"
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
#include "plugin/device/cpu/hal/hardware/ms_collective_comm_lib.h"
#endif
#ifndef ENABLE_SECURITY
@ -373,7 +375,7 @@ bool CPUDeviceContext::LoadCollectiveCommLib() {
collective_comm_lib_ = instance_func();
MS_EXCEPTION_IF_NULL(collective_comm_lib_);
} else {
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
collective_comm_lib_ = &MsCollectiveCommLib::GetInstance();
MS_EXCEPTION_IF_NULL(collective_comm_lib_);
#endif

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_CPU_AKG_CPU_KERNEL_BUILD_H_
#include <string>
#include "kernel/akg/akg_kernel_build.h"
#include "kernel/akg/akg_kernel_build_manager.h"
#include "base/base.h"
namespace mindspore {
@ -32,6 +33,7 @@ class AkgCpuKernelBuilder : public AkgKernelBuilder {
const AnfNodePtr &anf_node) override;
void AkgSaveJsonInfo(const string &kernel_name, const string &kernel_json) override;
};
REG_AKG_KERNEL_BUILDER(kCPUDevice, AkgCpuKernelBuilder);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_CPU_AKG_CPU_KERNEL_BUILD_H_

View File

@ -20,13 +20,13 @@
#include <functional>
#include <memory>
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
#include "plugin/device/cpu/hal/hardware/ms_collective_comm_lib.h"
#endif
namespace mindspore {
namespace kernel {
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
using device::CollectiveOpReduceType::Reduce_Sum;
using device::cpu::kMCCLGlobalGroupName;
using device::cpu::MsCollectiveCommLib;
@ -37,7 +37,7 @@ constexpr char kSupportedReduceOp[] = "sum";
} // namespace
void AllReduceCPUKernelMod::InitKernel(const CNodePtr &kernel_node) {
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
auto kernel_attr = GetKernelAttrFromNode(kernel_node);
@ -67,7 +67,7 @@ std::vector<KernelAttr> AllReduceCPUKernelMod::GetOpSupport() {
bool AllReduceCPUKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
if (inputs.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << kernel_name_ << " has at least one input and one output, but got 0.";
}

View File

@ -18,6 +18,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
#include <string>
#include "kernel/akg/akg_kernel_build.h"
#include "kernel/akg/akg_kernel_build_manager.h"
#include "base/base.h"
namespace mindspore {
@ -33,6 +34,7 @@ class AkgGpuKernelBuilder : public AkgKernelBuilder {
void AkgSaveJsonInfo(const string &kernel_name, const string &kernel_json) override;
};
REG_AKG_KERNEL_BUILDER(kGPUDevice, AkgGpuKernelBuilder);
} // namespace kernel
} // namespace mindspore

View File

@ -38,7 +38,7 @@
#include "include/common/utils/parallel_context.h"
#include "include/common/debug/env_config_parser.h"
#include "plugin/device/ascend/hal/device/ascend_device_address.h"
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#ifdef WITH_BACKEND
#include "ps/ps_cache/ps_cache_manager.h"
#endif
#include "kernel/common_utils.h"
@ -600,7 +600,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph &graph) {
}
add_need_alloc_nodes(input_node);
}
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#ifdef WITH_BACKEND
bool ps_cache_check = false;
#endif
std::map<AnfNodePtr, AnfNodePtr> shadow_backend_node_map;
@ -616,7 +616,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph &graph) {
continue;
}
DeviceAddressPtr device_address = GetInternalDeviceAddress(graph, item);
#if ((defined ENABLE_CPU) && (!defined _WIN32) && !defined(__APPLE__))
#ifdef WITH_BACKEND
const std::string &param_name = item->fullname_with_scope();
if (ps::ps_cache_instance.IsHashTable(param_name)) {
MS_LOG(INFO) << "Parameter(" << param_name << ")"
@ -1833,7 +1833,7 @@ void KernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
MS_LOG(INFO) << "Clear graph:" << graph_id << " runtime resource";
}
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#ifdef WITH_BACKEND
namespace {
// Finalize ps cache module before throw an exception.
void FinalizePsCache(const std::string &exception) {

View File

@ -193,7 +193,7 @@ class KernelRuntime {
void RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, const session::KernelGraph &graph);
void AssignValueNodeTensor(const ValueNodePtr &value_node, const ValuePtr &node_value, size_t output_idx);
DeviceAddressPtr PreAssignCNodeMemory(const AnfNodePtr &anf_node, size_t index) const;
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#ifdef WITH_BACKEND
void GetFirstPSEmbeddingCache(const session::KernelGraph &graph, AnfNodePtr *const first_cache_input_index,
size_t *const first_cache_size);
void CheckIfSupportPSEmbeddingCache(const session::KernelGraph &graph);

View File

@ -16,7 +16,7 @@
#include "runtime/device/kernel_runtime_manager.h"
#include "utils/log_adapter.h"
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#ifdef WITH_BACKEND
#include "ps/ps_cache/ps_cache_manager.h"
#endif
#include "backend/common/session/pynative_task_manager.h"
@ -26,7 +26,7 @@ namespace device {
void KernelRuntimeManager::ClearRuntimeResource() {
// Just remove PyNative tasks before runtime resource release.
session::PynativeTaskManager::GetInstance().Reset();
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#ifdef WITH_BACKEND
if (ps::PSContext::instance()->is_worker() && ps::PsDataPrefetch::GetInstance().cache_enable()) {
ps::ps_cache_instance.SyncEmbeddingTable();
}
@ -132,7 +132,7 @@ void KernelRuntimeManager::ReleaseKernelRuntime(const std::string &device_name,
if (runtime == nullptr) {
return;
}
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#ifdef WITH_BACKEND
if (ps::PSContext::instance()->is_worker() && ps::PsDataPrefetch::GetInstance().cache_enable()) {
ps::ps_cache_instance.SyncEmbeddingTable();
}

View File

@ -17,7 +17,7 @@
#ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_SET_H_
#define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_SET_H_
#if ((defined ENABLE_CPU) && (!defined _WIN32) && (!defined __APPLE__))
#ifdef WITH_BACKEND
#define ENABLE_RPC_ACTOR
#endif

View File

@ -172,7 +172,7 @@ void IntHandler(int, siginfo_t *, void *) {
}
#endif
#if ((defined ENABLE_CPU) && (!defined _WIN32) && (!defined _WIN64) && !defined(__APPLE__))
#ifdef WITH_BACKEND
bool SendFinishTransform(const std::string &actor_set_name) {
auto node = ClusterContext::instance()->node();
MS_EXCEPTION_IF_NULL(node);
@ -502,7 +502,7 @@ ActorSet *GraphScheduler::Transform(const GraphCompilerInfo &graph_compiler_info
Optimize(actor_set);
MS_LOG(INFO) << "Graph(" << graph_compiler_info.name_ << ") transforms actor end.";
#if ((defined ENABLE_CPU) && (!defined _WIN32) && (!defined _WIN64) && !defined(__APPLE__))
#ifdef WITH_BACKEND
if (ClusterContext::instance()->initialized() && RecoveryContext::GetInstance()->enable_recovery()) {
while (!SendFinishTransform(graph_compiler_info.name_)) {
MS_LOG(WARNING) << "Send finish transform graph failed.";
@ -599,7 +599,7 @@ void GraphScheduler::Run(ActorSet *const actor_set, const std::vector<DeviceCont
const size_t kSecondsToMilliseconds = 1000;
SetActorExecutionStrategy(actor_set, strategy, (end_time - start_time) * kSecondsToMilliseconds);
#if ((defined ENABLE_CPU) && (!defined _WIN32) && (!defined _WIN64) && !defined(__APPLE__))
#ifdef WITH_BACKEND
DoDisasterRecovery(actor_set->name_);
#endif
}