forked from mindspore-Ecosystem/mindspore
clean codex
This commit is contained in:
parent
0f065c87e2
commit
0683b7fd75
|
@ -812,8 +812,7 @@ std::string TbeKernelJsonCreator::GetDeviceOutputFormat(const AnfNodePtr &anf_no
|
|||
return format;
|
||||
}
|
||||
|
||||
void GetInputSizeList(const nlohmann::json &input_json, std::vector<size_t> *input_size_list,
|
||||
const AnfNodePtr &anf_node) {
|
||||
void GetInputSizeList(const nlohmann::json &input_json, std::vector<size_t> *input_size_list) {
|
||||
for (size_t i = 0; i < input_json.size(); i++) {
|
||||
for (size_t m = 0; m < input_json[i].size(); m++) {
|
||||
size_t size_i = 1;
|
||||
|
@ -840,8 +839,7 @@ void GetInputSizeList(const nlohmann::json &input_json, std::vector<size_t> *inp
|
|||
}
|
||||
}
|
||||
|
||||
void GetOutputSizeList(const nlohmann::json &output_json, std::vector<size_t> *output_size_list,
|
||||
const AnfNodePtr &anf_node) {
|
||||
void GetOutputSizeList(const nlohmann::json &output_json, std::vector<size_t> *output_size_list) {
|
||||
for (size_t i = 0; i < output_json.size(); i++) {
|
||||
for (size_t m = 0; m < output_json[i].size(); m++) {
|
||||
size_t size_i = 1;
|
||||
|
@ -878,8 +876,8 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<si
|
|||
}
|
||||
input_size_list->clear();
|
||||
output_size_list->clear();
|
||||
GetInputSizeList(kernel_json[kJOpInfo][kJInputs], input_size_list, anf_node);
|
||||
GetOutputSizeList(kernel_json[kJOpInfo][kJOutputs], output_size_list, anf_node);
|
||||
GetInputSizeList(kernel_json[kJOpInfo][kJInputs], input_size_list);
|
||||
GetOutputSizeList(kernel_json[kJOpInfo][kJOutputs], output_size_list);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -733,8 +733,8 @@ void AscendSession::CompileChildGraph(const KernelGraphPtr &child_graph) {
|
|||
if (!enable_mem_scheduler) {
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
runtime_instance->AssignStaticMemoryInput(child_graph.get());
|
||||
runtime_instance->AssignStaticMemoryValueNode(child_graph.get());
|
||||
runtime_instance->AssignStaticMemoryInput(*child_graph);
|
||||
runtime_instance->AssignStaticMemoryValueNode(*child_graph);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -822,7 +822,7 @@ void AscendSession::BindAddressToTensor(
|
|||
}
|
||||
}
|
||||
|
||||
void AscendSession::LaunchFunc(const KernelGraphPtr &graph, const std::vector<int64_t> &tensors_mask,
|
||||
void AscendSession::LaunchFunc(const KernelGraphPtr &graph,
|
||||
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node,
|
||||
bool is_dynamic_shape, const std::vector<tensor::TensorPtr> &input_tensors) {
|
||||
// Wait for AllReduce
|
||||
|
@ -887,7 +887,7 @@ void AscendSession::PrepareForOutputTensor(const KernelGraphPtr &graph,
|
|||
// Create DeviceAddress For Output Tensor(contain: Shape, Format, DType)
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime();
|
||||
runtime_instance->RunOpMallocPre(*graph, input_tensors);
|
||||
runtime_instance->UpdateRefNodeOutputMem(graph.get());
|
||||
runtime_instance->UpdateRefNodeOutputMem(*graph);
|
||||
// CREATE OUTPUT TENSOR ADDRESS
|
||||
UpdateOutputs(graph, outputs, input_tensors, tensor_to_node);
|
||||
}
|
||||
|
@ -951,7 +951,7 @@ void AscendSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_inf
|
|||
auto &task_manager = PynativeTaskManager::GetInstance();
|
||||
if (!cache_miss && task_manager.QueueEmpty()) {
|
||||
// Cache match and there are no task in Queue. Just Launch immediately.
|
||||
LaunchFunc(graph, tensors_mask, tensor_to_node, op_run_info->is_dynamic_shape, *input_tensors);
|
||||
LaunchFunc(graph, tensor_to_node, op_run_info->is_dynamic_shape, *input_tensors);
|
||||
} else {
|
||||
auto run_op_context = std::make_shared<RunOpContext>(graph_info, op_run_info->is_dynamic_shape, graph, tensors_mask,
|
||||
*input_tensors, tensor_to_node);
|
||||
|
@ -1320,7 +1320,7 @@ void AscendSession::BuildDynamicKernel(const std::shared_ptr<KernelGraph> &kerne
|
|||
}
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
if (!runtime_instance->GenDynamicKernel(kernel_graph.get())) {
|
||||
if (!runtime_instance->GenDynamicKernel(*kernel_graph)) {
|
||||
MS_LOG(DEBUG) << "Graph:" << kernel_graph->graph_id() << " failed to generate dynamic kernel!";
|
||||
}
|
||||
MS_LOG(DEBUG) << "Finish!";
|
||||
|
@ -1460,7 +1460,7 @@ void AscendSession::MemoryAlloc(KernelGraph *kernel_graph) const {
|
|||
InitMemReuseExecOrder(kernel_graph);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
runtime_instance->AssignMemory(kernel_graph);
|
||||
runtime_instance->AssignMemory(*kernel_graph);
|
||||
MS_LOG(INFO) << "Finish!";
|
||||
}
|
||||
|
||||
|
@ -1469,7 +1469,7 @@ void AscendSession::RunOpMemoryAlloc(const std::vector<tensor::TensorPtr> &input
|
|||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph);
|
||||
runtime_instance->RunOpAssignMemory(input_tensors, *kernel_graph);
|
||||
}
|
||||
|
||||
void AscendSession::RunOpMemoryAllocNew(const std::vector<tensor::TensorPtr> &input_tensors,
|
||||
|
@ -1478,21 +1478,21 @@ void AscendSession::RunOpMemoryAllocNew(const std::vector<tensor::TensorPtr> &in
|
|||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph, tensor_to_node);
|
||||
runtime_instance->RunOpAssignMemory(input_tensors, *kernel_graph, tensor_to_node);
|
||||
}
|
||||
|
||||
void AscendSession::RunOpGenKernelEvent(const KernelGraph *graph) const {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
runtime_instance->GenKernelEvents(graph);
|
||||
runtime_instance->GenKernelEvents(*graph);
|
||||
}
|
||||
|
||||
void AscendSession::RunOpMemoryClear(const KernelGraph *kernel_graph) const {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
runtime_instance->RunOpClearMemory(kernel_graph);
|
||||
runtime_instance->RunOpClearMemory(*kernel_graph);
|
||||
}
|
||||
|
||||
void AscendSession::Load(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
||||
|
@ -1503,7 +1503,7 @@ void AscendSession::Load(const std::shared_ptr<KernelGraph> &kernel_graph) const
|
|||
(void)device::KernelAdjust::GetInstance().StepLoadCtrlInputs(kernel_graph);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
bool ret_ok = runtime_instance->Load(kernel_graph.get(), is_task_sink);
|
||||
bool ret_ok = runtime_instance->Load(*kernel_graph, is_task_sink);
|
||||
if (!ret_ok) {
|
||||
MS_LOG(EXCEPTION) << "Load task error!";
|
||||
}
|
||||
|
@ -1525,7 +1525,7 @@ void AscendSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph, bo
|
|||
DumpSetup(kernel_graph);
|
||||
#endif
|
||||
}
|
||||
bool ret_ok = runtime_instance->Run(kernel_graph.get(), is_task_sink);
|
||||
bool ret_ok = runtime_instance->Run(*kernel_graph, is_task_sink);
|
||||
if (is_task && is_task_sink) {
|
||||
#ifndef ENABLE_SECURITY
|
||||
Dump(kernel_graph);
|
||||
|
@ -1599,7 +1599,7 @@ void AscendSession::LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph)
|
|||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
(void)runtime_instance->LoadData(kernel_graph.get());
|
||||
(void)runtime_instance->LoadData(*kernel_graph);
|
||||
MS_LOG(INFO) << "Finish!";
|
||||
}
|
||||
|
||||
|
@ -1884,8 +1884,8 @@ void AscendSession::AssignStaticMemory(NotNull<KernelGraphPtr> graph,
|
|||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
runtime_instance->ClearGlobalIdleMem();
|
||||
runtime_instance->AssignStaticMemoryInput(graph.get().get());
|
||||
runtime_instance->AssignStaticMemoryValueNode(graph.get().get());
|
||||
runtime_instance->AssignStaticMemoryInput(*graph.get());
|
||||
runtime_instance->AssignStaticMemoryValueNode(*graph.get());
|
||||
for (auto &child_graph : graph->child_graph_order()) {
|
||||
AssignStaticMemory(NOT_NULL(child_graph.lock()), memo);
|
||||
}
|
||||
|
@ -1977,8 +1977,7 @@ void AscendSession::ExecuteAllTaskInQueue() {
|
|||
while (!launch_tasks.empty()) {
|
||||
auto &launch_task = launch_tasks.front();
|
||||
const auto &context = launch_task->context();
|
||||
LaunchFunc(context->graph(), context->tensor_mask(), context->tensor_to_node(), context->is_dynamic_shape(),
|
||||
context->input_tensors());
|
||||
LaunchFunc(context->graph(), context->tensor_to_node(), context->is_dynamic_shape(), context->input_tensors());
|
||||
launch_tasks.pop();
|
||||
}
|
||||
|
||||
|
|
|
@ -153,7 +153,7 @@ class AscendSession : public SessionBasic {
|
|||
VectorRef *outputs) const;
|
||||
std::shared_ptr<device::Bucket> CreateBucket(uint32_t bucket_id, uint32_t bucket_size) override;
|
||||
|
||||
void LaunchFunc(const KernelGraphPtr &graph, const std::vector<int64_t> &tensors_mask,
|
||||
void LaunchFunc(const KernelGraphPtr &graph,
|
||||
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node, bool is_dynamic_shape,
|
||||
const std::vector<tensor::TensorPtr> &input_tensors);
|
||||
KernelGraphPtr CreateKernelGraph(const GraphInfo &graph_info, OpRunInfo *op_run_info,
|
||||
|
|
|
@ -209,7 +209,7 @@ void CPUSession::PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_gra
|
|||
}
|
||||
|
||||
void CPUSession::ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph) {
|
||||
bool ret = runtime_.Run(kernel_graph.get(), false);
|
||||
bool ret = runtime_.Run(*kernel_graph, false);
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Run graph failed";
|
||||
}
|
||||
|
@ -291,7 +291,7 @@ void CPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
|
|||
runtime_.CreateOutputTensors(kernel_graph.get(), *input_tensors, outputs, &tensor_to_node);
|
||||
runtime_.BindInputOutput(kernel_graph.get(), *input_tensors, outputs);
|
||||
|
||||
bool ret = runtime_.Run(kernel_graph.get(), false);
|
||||
bool ret = runtime_.Run(*kernel_graph, false);
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Run Op failed";
|
||||
}
|
||||
|
@ -301,7 +301,7 @@ void CPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
|
|||
UpdateOutputAbstract(kernel_graph, op_run_info);
|
||||
}
|
||||
SetOutputFlags(*outputs);
|
||||
runtime_.RunOpClearMemory(kernel_graph.get());
|
||||
runtime_.RunOpClearMemory(*kernel_graph);
|
||||
}
|
||||
|
||||
void CPUSession::SetKernelInfo(const KernelGraph *kernel_graph) {
|
||||
|
|
|
@ -248,7 +248,7 @@ void GPUSession::AllocateMemory(KernelGraph *kernel_graph) const {
|
|||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
runtime_instance->AssignMemory(kernel_graph);
|
||||
runtime_instance->AssignMemory(*kernel_graph);
|
||||
}
|
||||
|
||||
void GPUSession::RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input_tensors,
|
||||
|
@ -256,21 +256,21 @@ void GPUSession::RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input
|
|||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph);
|
||||
runtime_instance->RunOpAssignMemory(input_tensors, *kernel_graph);
|
||||
}
|
||||
|
||||
void GPUSession::RunOpGenKernelEvent(const KernelGraph *graph) const {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
runtime_instance->GenKernelEvents(graph);
|
||||
runtime_instance->GenKernelEvents(*graph);
|
||||
}
|
||||
|
||||
void GPUSession::RunOpClearMemory(KernelGraph *kernel_graph) const {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
runtime_instance->RunOpClearMemory(kernel_graph);
|
||||
runtime_instance->RunOpClearMemory(*kernel_graph);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -626,7 +626,7 @@ void GPUSession::UpdateOutputTensors(const VectorRef *outputs,
|
|||
void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
if (!runtime_instance->Run(kernel_graph.get(), false)) {
|
||||
if (!runtime_instance->Run(*kernel_graph, false)) {
|
||||
MS_LOG(EXCEPTION) << "GPU execute graph failed!";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -99,7 +99,7 @@ void DumpJsonParser::Parse() {
|
|||
std::ifstream json_file(dump_config_file.value());
|
||||
if (!json_file.is_open()) {
|
||||
MS_LOG(EXCEPTION) << "Dump file:" << dump_config_file.value() << " open failed."
|
||||
<< " Errno:" << errno << " ErrInfo:" << strerror(errno);
|
||||
<< " Errno:" << errno;
|
||||
}
|
||||
|
||||
nlohmann::json j;
|
||||
|
@ -586,13 +586,13 @@ bool DumpJsonParser::OutputNeedDump() const {
|
|||
return input_output_ == kDumpInputAndOutput || input_output_ == kDumpOutputOnly;
|
||||
}
|
||||
|
||||
void DumpJsonParser::UpdateNeedDumpKernels(NotNull<const session::KernelGraph *> kernel_graph) {
|
||||
void DumpJsonParser::UpdateNeedDumpKernels(const session::KernelGraph &kernel_graph) {
|
||||
if (!async_dump_enabled_) {
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Update async dump kernel list for hccl";
|
||||
std::map<std::string, uint32_t> update_kernels;
|
||||
for (const auto &kernel : kernel_graph->execution_order()) {
|
||||
for (const auto &kernel : kernel_graph.execution_order()) {
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
if (AnfAlgo::GetKernelType(kernel) == HCCL_KERNEL &&
|
||||
DumpJsonParser::GetInstance().NeedDump(GetKernelNodeName(kernel))) {
|
||||
|
|
|
@ -61,7 +61,7 @@ class DumpJsonParser {
|
|||
bool InputNeedDump() const;
|
||||
bool OutputNeedDump() const;
|
||||
std::string GetOpOverflowBinPath(uint32_t graph_id) const;
|
||||
void UpdateNeedDumpKernels(NotNull<const session::KernelGraph *> kernel_graph);
|
||||
void UpdateNeedDumpKernels(const session::KernelGraph &kernel_graph);
|
||||
|
||||
void ClearGraph() { graphs_.clear(); }
|
||||
void SaveGraph(session::KernelGraph *graph) { (void)graphs_.emplace_back(graph); }
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <memory>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include "utils/signal_util.h"
|
||||
#include "runtime/device/ascend/ascend_device_address.h"
|
||||
#include "runtime/device/ascend/distribute/ascend_collective.h"
|
||||
|
@ -372,8 +373,7 @@ bool AscendKernelRuntime::Init() {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
bool AscendKernelRuntime::LoadData(const session::KernelGraph &graph) {
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
MS_LOG(INFO) << "Start load step";
|
||||
for (const auto &graph_ptr : debugger_->GetGraphPtrList()) {
|
||||
|
@ -412,7 +412,7 @@ DeviceAddressPtr AscendKernelRuntime::CreateDeviceAddress(void *device_ptr, size
|
|||
return std::make_shared<AscendDeviceAddress>(device_ptr, device_size, format, type_id, node_index);
|
||||
}
|
||||
|
||||
bool AscendKernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) {
|
||||
bool AscendKernelRuntime::Load(const session::KernelGraph &graph, bool is_task_sink) {
|
||||
if (!is_task_sink) {
|
||||
MS_LOG(INFO) << "Graph mode with not task sink";
|
||||
GenKernelEvents(graph);
|
||||
|
@ -428,10 +428,9 @@ bool AscendKernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph &graph) {
|
||||
MS_LOG(INFO) << "GenDynamicKernel start";
|
||||
auto cnode_list = graph->execution_order();
|
||||
auto cnode_list = graph.execution_order();
|
||||
std::vector<DynamicKernelPtr> dynamic_kernels;
|
||||
for (const auto &cnode : cnode_list) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
|
@ -445,15 +444,14 @@ bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph *graph) {
|
|||
dynamic_kernel->Initialize();
|
||||
dynamic_kernels.emplace_back(dynamic_kernel);
|
||||
}
|
||||
graph_dynamic_kernel_map_[graph->graph_id()] = std::move(dynamic_kernels);
|
||||
graph_dynamic_kernel_map_[graph.graph_id()] = std::move(dynamic_kernels);
|
||||
MS_LOG(INFO) << "GenDynamicKernel end";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
bool AscendKernelRuntime::GenTask(const session::KernelGraph &graph) {
|
||||
SetCurrentContext();
|
||||
if (graph->is_dynamic_shape()) {
|
||||
if (graph.is_dynamic_shape()) {
|
||||
if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE && (ConfigManager::GetInstance().iter_num() > 1)) {
|
||||
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with dataset_sink_mode.";
|
||||
}
|
||||
|
@ -465,9 +463,9 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
|
|||
MS_LOG(INFO) << "Dynamic Shape Graph Generate Dynamic kernel";
|
||||
return GenDynamicKernel(graph);
|
||||
}
|
||||
MS_LOG(INFO) << "GenTask start. GraphId:" << graph->graph_id();
|
||||
MS_LOG(INFO) << "GenTask start. GraphId:" << graph.graph_id();
|
||||
#ifndef ENABLE_SECURITY
|
||||
DumpJsonParser::GetInstance().UpdateNeedDumpKernels(NOT_NULL(graph));
|
||||
DumpJsonParser::GetInstance().UpdateNeedDumpKernels(graph);
|
||||
#endif
|
||||
#ifdef MEM_REUSE_DEBUG
|
||||
if (!EnvConfigParser::GetInstance().GetSysMemreuse()) {
|
||||
|
@ -476,19 +474,19 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
|
|||
}
|
||||
#endif
|
||||
vector<std::shared_ptr<TaskInfo>> task_info_list;
|
||||
auto anf_node_list = graph->execution_order();
|
||||
auto anf_node_list = graph.execution_order();
|
||||
auto task_generator = TaskGenerator();
|
||||
if (!task_generator.GenTasks(anf_node_list, &task_info_list, graph->graph_id())) {
|
||||
if (!task_generator.GenTasks(anf_node_list, &task_info_list, graph.graph_id())) {
|
||||
return false;
|
||||
}
|
||||
// Store the task_info_list
|
||||
auto insert_ret = task_map_.insert(std::make_pair(graph->graph_id(), task_info_list));
|
||||
auto insert_ret = task_map_.insert(std::make_pair(graph.graph_id(), task_info_list));
|
||||
if (!insert_ret.second) {
|
||||
MS_LOG(EXCEPTION) << "Duplicate GraphId! Please check in ascend_session.";
|
||||
}
|
||||
// Graph may have no compute node, such TensorAddGrad.
|
||||
if (task_info_list.empty()) {
|
||||
MS_LOG(WARNING) << "Graph " << graph->graph_id() << " have no compute node";
|
||||
MS_LOG(WARNING) << "Graph " << graph.graph_id() << " have no compute node";
|
||||
return true;
|
||||
}
|
||||
AscendStreamAssign &assign_instance = AscendStreamAssign::GetInstance();
|
||||
|
@ -500,13 +498,13 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
|
|||
assign_instance.GetHcomStreams(&force_copy_stream_list);
|
||||
MS_LOG(INFO) << "Call DavinciModel total stream num:" << resource_manager.get_cur_stream_num()
|
||||
<< ", total event num:" << resource_manager.get_cur_event_num()
|
||||
<< ", total label num:" << graph->label_num()
|
||||
<< ", total label num:" << graph.label_num()
|
||||
<< ", wait_active_stream_list size:" << wait_active_stream_list.size()
|
||||
<< ", force_copy_stream_list size:" << force_copy_stream_list.size();
|
||||
auto model = std::make_shared<ge::model_runner::DavinciModel>(
|
||||
task_info_list, wait_active_stream_list, force_copy_stream_list, 0, 0, 0, 0, 0, 0,
|
||||
resource_manager.get_cur_stream_num(), graph->label_num(), resource_manager.get_cur_event_num(), 0);
|
||||
auto ret = graph_model_map_.insert(std::make_pair(graph->graph_id(), model));
|
||||
resource_manager.get_cur_stream_num(), graph.label_num(), resource_manager.get_cur_event_num(), 0);
|
||||
auto ret = graph_model_map_.insert(std::make_pair(graph.graph_id(), model));
|
||||
if (!ret.second) {
|
||||
MS_LOG(EXCEPTION) << "Duplicate GraphId! Please check in ascend_session.";
|
||||
}
|
||||
|
@ -514,23 +512,22 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
bool AscendKernelRuntime::LoadTask(const session::KernelGraph &graph) {
|
||||
SetCurrentContext();
|
||||
if (graph->is_dynamic_shape()) {
|
||||
if (graph.is_dynamic_shape()) {
|
||||
MS_LOG(INFO) << "Dynamic Shape Graph Skip Load Task Step";
|
||||
return true;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "LoadTask start. GraphId:" << graph->graph_id();
|
||||
MS_LOG(INFO) << "LoadTask start. GraphId:" << graph.graph_id();
|
||||
if (GraphWithEmptyTaskList(graph)) {
|
||||
MS_LOG(WARNING) << "LoadTask end, task list is empty";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto model_iter = graph_model_map_.find(graph->graph_id());
|
||||
auto model_iter = graph_model_map_.find(graph.graph_id());
|
||||
if (model_iter == graph_model_map_.end()) {
|
||||
MS_LOG(ERROR) << "GraphId:" << graph->graph_id() << " Invalid! Graph LoadTask without GenTask.";
|
||||
MS_LOG(ERROR) << "GraphId:" << graph.graph_id() << " Invalid! Graph LoadTask without GenTask.";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -540,7 +537,7 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
|
|||
#ifndef ENABLE_SECURITY
|
||||
std::function<void *()> model_handle =
|
||||
std::bind(&ModelRunner::GetModelHandle, &ModelRunner::Instance(), model_iter->first);
|
||||
DistributeDebugTask(NOT_NULL(graph), NOT_NULL(model_handle));
|
||||
DistributeDebugTask(graph, NOT_NULL(model_handle));
|
||||
#endif
|
||||
|
||||
try {
|
||||
|
@ -556,9 +553,9 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
|
|||
if (ProfilingManager::GetInstance().IsProfiling()) {
|
||||
auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first);
|
||||
auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first);
|
||||
ProfilingUtils::ReportProfilingData(task_ids, stream_ids, *graph);
|
||||
ProfilingUtils::ReportProfilingData(task_ids, stream_ids, graph);
|
||||
}
|
||||
LaunchDataDump(graph->graph_id());
|
||||
LaunchDataDump(graph.graph_id());
|
||||
#endif
|
||||
|
||||
ModelRunner::Instance().LoadModelComplete(model_iter->first);
|
||||
|
@ -566,18 +563,18 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
|
|||
}
|
||||
|
||||
#ifndef ENABLE_SECURITY
|
||||
void AscendKernelRuntime::DistributeDebugTask(NotNull<const session::KernelGraph *> graph,
|
||||
void AscendKernelRuntime::DistributeDebugTask(const session::KernelGraph &graph,
|
||||
const NotNull<std::function<void *()>> &model_handle) {
|
||||
if (!DumpJsonParser::GetInstance().async_dump_enabled()) {
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Start Distribute Debug Task";
|
||||
auto data_dumper = std::make_shared<DataDumper>(graph.get(), model_handle);
|
||||
auto data_dumper = std::make_shared<DataDumper>(&graph, model_handle);
|
||||
MS_EXCEPTION_IF_NULL(data_dumper);
|
||||
auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper);
|
||||
auto ret = graph_data_dumper_.try_emplace(graph.graph_id(), data_dumper);
|
||||
data_dumper->OpDebugRegister();
|
||||
if (!ret.second) {
|
||||
MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed";
|
||||
MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph.graph_id() << " data dumper failed";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -671,8 +668,7 @@ std::string AscendKernelRuntime::GetDumpPath() {
|
|||
}
|
||||
|
||||
#ifndef ENABLE_SECURITY
|
||||
void AscendKernelRuntime::DumpTaskExceptionInfo(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
void AscendKernelRuntime::DumpTaskExceptionInfo(const session::KernelGraph &graph) {
|
||||
const std::string path = GetDumpPath();
|
||||
if (access(path.c_str(), F_OK) == 0) {
|
||||
if (!DeleteDumpDir(path)) {
|
||||
|
@ -697,10 +693,9 @@ void AscendKernelRuntime::DumpTaskExceptionInfo(const session::KernelGraph *grap
|
|||
}
|
||||
#endif
|
||||
|
||||
bool AscendKernelRuntime::Run(session::KernelGraph *const graph, bool is_task_sink) {
|
||||
bool AscendKernelRuntime::Run(const session::KernelGraph &graph, bool is_task_sink) {
|
||||
const uint64_t kUSecondInSecond = 1000000;
|
||||
SignalGuard sg(IntHandler);
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
bool ret = false;
|
||||
|
||||
if (is_task_sink) {
|
||||
|
@ -784,10 +779,9 @@ void AscendKernelRuntime::SetKernelModStream(const std::vector<CNodePtr> &kernel
|
|||
[](const std::pair<void *, size_t> &item) { return item.second; });
|
||||
}
|
||||
|
||||
void AscendKernelRuntime::GenKernelEvents(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto &kernels = graph->execution_order();
|
||||
if (kernels.empty() || graph_kernel_events_map_.find(graph->graph_id()) != graph_kernel_events_map_.end()) {
|
||||
void AscendKernelRuntime::GenKernelEvents(const session::KernelGraph &graph) {
|
||||
auto &kernels = graph.execution_order();
|
||||
if (kernels.empty() || graph_kernel_events_map_.find(graph.graph_id()) != graph_kernel_events_map_.end()) {
|
||||
return;
|
||||
}
|
||||
std::vector<size_t> last_stream_nodes;
|
||||
|
@ -840,7 +834,7 @@ void AscendKernelRuntime::GenKernelEvents(const session::KernelGraph *graph) {
|
|||
}
|
||||
}
|
||||
ProcessBoundaryEvent(kernels, &kernel_post_run_events, last_stream_nodes);
|
||||
graph_kernel_events_map_[graph->graph_id()] = std::move(kernel_events);
|
||||
graph_kernel_events_map_[graph.graph_id()] = std::move(kernel_events);
|
||||
}
|
||||
|
||||
void AscendKernelRuntime::ProcessBoundaryEvent(const std::vector<CNodePtr> &kernels,
|
||||
|
@ -882,12 +876,11 @@ void AscendKernelRuntime::ProcessBoundaryEvent(const std::vector<CNodePtr> &kern
|
|||
}
|
||||
}
|
||||
|
||||
bool AscendKernelRuntime::RunDynamicKernelAsync(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_LOG(INFO) << "RunExecutorAsync start. GraphId:" << graph->graph_id();
|
||||
auto iter = graph_dynamic_kernel_map_.find(graph->graph_id());
|
||||
bool AscendKernelRuntime::RunDynamicKernelAsync(const session::KernelGraph &graph) {
|
||||
MS_LOG(INFO) << "RunExecutorAsync start. GraphId:" << graph.graph_id();
|
||||
auto iter = graph_dynamic_kernel_map_.find(graph.graph_id());
|
||||
if (iter == graph_dynamic_kernel_map_.end()) {
|
||||
MS_LOG(ERROR) << "GraphId:" << graph->graph_id() << " Not Found! Please generator executor first";
|
||||
MS_LOG(ERROR) << "GraphId:" << graph.graph_id() << " Not Found! Please generator executor first";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -919,16 +912,15 @@ bool AscendKernelRuntime::RunDynamicKernelAsync(const session::KernelGraph *grap
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) {
|
||||
current_graph_ = graph;
|
||||
bool AscendKernelRuntime::RunTask(const session::KernelGraph &graph) {
|
||||
current_graph_ = &graph;
|
||||
SetCurrentContext();
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
if (graph->is_dynamic_shape()) {
|
||||
if (graph.is_dynamic_shape()) {
|
||||
MS_LOG(INFO) << "Dynamic Shape Graph Run Task Async";
|
||||
return RunDynamicKernelAsync(graph);
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "RunTask start. GraphId:" << graph->graph_id();
|
||||
MS_LOG(INFO) << "RunTask start. GraphId:" << graph.graph_id();
|
||||
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
|
@ -937,13 +929,13 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) {
|
|||
return true;
|
||||
}
|
||||
|
||||
if (!CheckGraphIdValid(graph->graph_id())) {
|
||||
MS_LOG(ERROR) << "GraphId:" << graph->graph_id() << " Invalid! Graph RunTask without GenTask.";
|
||||
if (!CheckGraphIdValid(graph.graph_id())) {
|
||||
MS_LOG(ERROR) << "GraphId:" << graph.graph_id() << " Invalid! Graph RunTask without GenTask.";
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
ModelRunner::Instance().RunModel(graph->graph_id());
|
||||
ModelRunner::Instance().RunModel(graph.graph_id());
|
||||
} catch (const std::exception &) {
|
||||
#ifndef ENABLE_SECURITY
|
||||
DumpTaskExceptionInfo(graph);
|
||||
|
@ -1139,9 +1131,8 @@ bool AscendKernelRuntime::DestroyHccl() {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AscendKernelRuntime::GraphWithEmptyTaskList(const session::KernelGraph *graph) const {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto iter = task_map_.find(graph->graph_id());
|
||||
bool AscendKernelRuntime::GraphWithEmptyTaskList(const session::KernelGraph &graph) const {
|
||||
auto iter = task_map_.find(graph.graph_id());
|
||||
if (iter == task_map_.end()) {
|
||||
MS_LOG(EXCEPTION) << "Unknown graph ptr";
|
||||
}
|
||||
|
|
|
@ -41,19 +41,19 @@ class AscendKernelRuntime : public KernelRuntime {
|
|||
AscendKernelRuntime() = default;
|
||||
~AscendKernelRuntime() override;
|
||||
bool Init() override;
|
||||
bool LoadData(session::KernelGraph *graph) override;
|
||||
bool GenTask(const session::KernelGraph *graph);
|
||||
void GenKernelEvents(const session::KernelGraph *graph) override;
|
||||
bool LoadData(const session::KernelGraph &graph) override;
|
||||
bool GenTask(const session::KernelGraph &graph);
|
||||
void GenKernelEvents(const session::KernelGraph &graph) override;
|
||||
void SetKernelModStream(const std::vector<CNodePtr> &kernels, std::vector<size_t> *last_stream_nodes);
|
||||
void ProcessBoundaryEvent(const std::vector<CNodePtr> &kernels,
|
||||
std::vector<std::vector<std::function<void()>>> *kernel_run_events,
|
||||
const std::vector<size_t> &last_stream_nodes);
|
||||
bool GenDynamicKernel(const session::KernelGraph *graph) override;
|
||||
bool RunDynamicKernelAsync(const session::KernelGraph *graph) override;
|
||||
bool LoadTask(const session::KernelGraph *graph);
|
||||
bool RunTask(const session::KernelGraph *graph);
|
||||
bool Load(session::KernelGraph *graph, bool is_task_sink) override;
|
||||
bool Run(session::KernelGraph *graph, bool is_task_sink) override;
|
||||
bool GenDynamicKernel(const session::KernelGraph &graph) override;
|
||||
bool RunDynamicKernelAsync(const session::KernelGraph &graph) override;
|
||||
bool LoadTask(const session::KernelGraph &graph);
|
||||
bool RunTask(const session::KernelGraph &graph);
|
||||
bool Load(const session::KernelGraph &graph, bool is_task_sink) override;
|
||||
bool Run(const session::KernelGraph &graph, bool is_task_sink) override;
|
||||
void ClearGraphRuntimeResource(uint32_t graph_id) override;
|
||||
void ClearGlobalIdleMem() override;
|
||||
bool SyncStream() override;
|
||||
|
@ -91,18 +91,17 @@ class AscendKernelRuntime : public KernelRuntime {
|
|||
|
||||
void ClearGraphModelMap();
|
||||
void ReleaseDeviceRes() override;
|
||||
bool GraphWithEmptyTaskList(const session::KernelGraph *graph) const;
|
||||
bool GraphWithEmptyTaskList(const session::KernelGraph &graph) const;
|
||||
bool CheckGraphIdValid(GraphId graph_id) const;
|
||||
#ifndef ENABLE_SECURITY
|
||||
void DistributeDebugTask(NotNull<const session::KernelGraph *> graph,
|
||||
const NotNull<std::function<void *()>> &model_handle);
|
||||
void DistributeDebugTask(const session::KernelGraph &graph, const NotNull<std::function<void *()>> &model_handle);
|
||||
void LaunchDataDump(GraphId graph_id);
|
||||
void ReportProfilingData();
|
||||
#endif
|
||||
static CNodePtr GetErrorNodeName(uint32_t streamid, uint32_t taskid);
|
||||
static std::string GetDumpPath();
|
||||
#ifndef ENABLE_SECURITY
|
||||
static void DumpTaskExceptionInfo(const session::KernelGraph *graph);
|
||||
static void DumpTaskExceptionInfo(const session::KernelGraph &graph);
|
||||
#endif
|
||||
static void TaskFailCallback(rtExceptionInfo *task_fail_info);
|
||||
static bool DeleteDumpDir(const std::string &path);
|
||||
|
|
|
@ -197,11 +197,11 @@ uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_m
|
|||
}
|
||||
}
|
||||
|
||||
void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) {
|
||||
void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph &graph) {
|
||||
MemoryManager::MallocSomasDynamicMem(graph);
|
||||
#ifndef ENABLE_SECURITY
|
||||
if (MemoryProfiling::GetInstance().IsMemoryProfilingEnable()) {
|
||||
somas_reuse_util_ptr_->ConvertToProfilingNode(graph->graph_id());
|
||||
somas_reuse_util_ptr_->ConvertToProfilingNode(graph.graph_id());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ class AscendMemoryManager : public MemoryManager {
|
|||
void *MallocMemFromMemPool(size_t size) override;
|
||||
void FreeMemFromMemPool(void *device_ptr) override;
|
||||
uint64_t GetDeviceMemSize();
|
||||
void MallocSomasDynamicMem(const session::KernelGraph *graph) override;
|
||||
void MallocSomasDynamicMem(const session::KernelGraph &graph) override;
|
||||
uint8_t *MallocCommunicationMemFromMemPool(size_t size) override;
|
||||
std::vector<void *> MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list) override {
|
||||
return AscendMemoryPool::GetInstance().AllocContinuousTensorMem(total_size, size_list);
|
||||
|
|
|
@ -142,10 +142,9 @@ bool AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const No
|
|||
}
|
||||
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
|
||||
auto data_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
|
||||
std::vector<int64_t> tmp_shape;
|
||||
std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(tmp_shape), SizeToLong);
|
||||
return UpdateShapeAndType(tmp_shape, data_type, NOT_NULL(input_shape_and_type_[input_index]));
|
||||
return UpdateShapeAndType(tmp_shape, NOT_NULL(input_shape_and_type_[input_index]));
|
||||
}
|
||||
|
||||
bool AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const NotNull<AnfNodePtr> &anf_node) {
|
||||
|
@ -171,8 +170,7 @@ bool AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const
|
|||
|
||||
std::vector<int64_t> tmp_shape;
|
||||
std::transform(shape.begin(), shape.end(), std::back_inserter(tmp_shape), SizeToLong);
|
||||
return UpdateShapeAndType(tmp_shape, AnfAlgo::GetOutputDeviceDataType(anf_node, output_index),
|
||||
NOT_NULL(output_shape_and_type_[output_index]));
|
||||
return UpdateShapeAndType(tmp_shape, NOT_NULL(output_shape_and_type_[output_index]));
|
||||
}
|
||||
|
||||
bool AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, NotNull<std::vector<int64_t> *> shape,
|
||||
|
@ -182,7 +180,7 @@ bool AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, NotNull<s
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AicpuExtInfoHandler::UpdateShapeAndType(const std::vector<int64_t> &shape, TypeId data_type,
|
||||
bool AicpuExtInfoHandler::UpdateShapeAndType(const std::vector<int64_t> &shape,
|
||||
NotNull<AicpuShapeAndType *> shape_and_type) {
|
||||
if (shape.empty() || shape.size() > kernel::kMaxShapeDims) {
|
||||
MS_LOG(ERROR) << "Invalid shape:" << shape.size();
|
||||
|
|
|
@ -65,8 +65,7 @@ class AicpuExtInfoHandler {
|
|||
bool ParseExtInputShape(AicpuExtInfo *aicpu_ext_info);
|
||||
bool ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info);
|
||||
|
||||
static bool UpdateShapeAndType(const std::vector<int64_t> &shape, TypeId data_type,
|
||||
NotNull<AicpuShapeAndType *> shape_and_type);
|
||||
static bool UpdateShapeAndType(const std::vector<int64_t> &shape, NotNull<AicpuShapeAndType *> shape_and_type);
|
||||
|
||||
static void GetShapeAndType(NotNull<const AicpuShapeAndType *> shape_and_type, NotNull<std::vector<int64_t> *> shape,
|
||||
NotNull<TypeId *> data_type);
|
||||
|
|
|
@ -227,7 +227,7 @@ rtError_t CtrlCallbackHandle(uint32_t rt_type, void *data, uint32_t len) {
|
|||
return RT_ERROR_NONE;
|
||||
}
|
||||
|
||||
bool ProfilingManager::StopProfiling() {
|
||||
bool ProfilingManager::StopProfiling() const {
|
||||
MS_LOG(INFO) << "StopProfiling";
|
||||
if (!IsProfiling()) {
|
||||
MS_LOG(INFO) << "No need profiling. please export PROFILING_MODE and in train mode.";
|
||||
|
|
|
@ -49,7 +49,7 @@ class ProfilingManager {
|
|||
bool ReportProfilingData(const map<uint32_t, string> &op_taskId_map) const;
|
||||
bool ProfRegisterCtrlCallback() const;
|
||||
bool StartupProfiling(uint32_t device_id);
|
||||
bool StopProfiling();
|
||||
bool StopProfiling() const;
|
||||
|
||||
inline bool IsProfiling() const {
|
||||
auto profiler_manager = profiler::ProfilerManager::GetInstance();
|
||||
|
|
|
@ -210,7 +210,7 @@ void ProfilingUtils::GetTraceBpEnd(const session::KernelGraph &kernel_graph, con
|
|||
if (bp_end_str.empty()) {
|
||||
trace_info->trace_bp_end = trace_info->trace_iter_end;
|
||||
} else {
|
||||
trace_info->trace_bp_end.insert(bp_end_str);
|
||||
(void)trace_info->trace_bp_end.insert(bp_end_str);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -72,7 +72,8 @@ void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) {
|
|||
if (is_enable_mem_reuse) {
|
||||
MS_EXCEPTION_IF_NULL(mem_manager_);
|
||||
mem_manager_->ResetDynamicMemory();
|
||||
AssignDynamicMemory(kernel_graph);
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
AssignDynamicMemory(*kernel_graph);
|
||||
#ifdef MEM_REUSE_DEBUG
|
||||
// Get normal graph ir for memreuse
|
||||
mindspore::memreuse::MemReuseChecker::GetInstance().CheckNormalIR(kernel_graph);
|
||||
|
@ -405,16 +406,15 @@ void CPUKernelRuntime::DecreaseSummaryRefCount(const session::NamedSummaryOutput
|
|||
static_cast<CPUMemoryManager *>(mem_manager_.get())->DecreaseSummaryRefCount(summary_outputs);
|
||||
}
|
||||
|
||||
bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
static_cast<CPUMemoryManager *>(mem_manager_.get())->IncreaseAddressRefCount(kernel_graph);
|
||||
bool CPUKernelRuntime::Run(const session::KernelGraph &kernel_graph, bool) {
|
||||
static_cast<CPUMemoryManager *>(mem_manager_.get())->IncreaseAddressRefCount(&kernel_graph);
|
||||
|
||||
auto kernels = kernel_graph->execution_order();
|
||||
auto kernels = kernel_graph.execution_order();
|
||||
|
||||
#ifndef ENABLE_SECURITY
|
||||
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
||||
bool iter_dump_flag = dump_json_parser.GetIterDumpFlag();
|
||||
uint32_t graph_id = kernel_graph->graph_id();
|
||||
uint32_t graph_id = kernel_graph.graph_id();
|
||||
#endif
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
std::string name = "mem_address_list";
|
||||
|
@ -490,7 +490,7 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool) {
|
|||
}
|
||||
#ifndef ENABLE_SECURITY
|
||||
if (iter_dump_flag) {
|
||||
CPUE2eDump::DumpParametersAndConst(kernel_graph, graph_id);
|
||||
CPUE2eDump::DumpParametersAndConst(&kernel_graph, graph_id);
|
||||
}
|
||||
if (graph_id == 0) {
|
||||
dump_json_parser.UpdateDumpIter();
|
||||
|
|
|
@ -36,7 +36,7 @@ class CPUKernelRuntime : public KernelRuntime {
|
|||
~CPUKernelRuntime() override = default;
|
||||
|
||||
bool Init();
|
||||
bool Run(session::KernelGraph *graph, bool is_task_sink) override;
|
||||
bool Run(const session::KernelGraph &graph, bool is_task_sink) override;
|
||||
void AssignKernelAddress(session::KernelGraph *kernel_graph);
|
||||
void CreateOutputTensors(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs,
|
||||
VectorRef *outputs, std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node);
|
||||
|
@ -44,8 +44,8 @@ class CPUKernelRuntime : public KernelRuntime {
|
|||
VectorRef *outputs);
|
||||
void IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs);
|
||||
void DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs);
|
||||
bool GenDynamicKernel(const session::KernelGraph *graph) override { return true; }
|
||||
bool RunDynamicKernelAsync(const session::KernelGraph *graph) override { return true; }
|
||||
bool GenDynamicKernel(const session::KernelGraph &graph) override { return true; }
|
||||
bool RunDynamicKernelAsync(const session::KernelGraph &graph) override { return true; }
|
||||
DeviceAddressType GetTargetDeviceAddressType() const override { return DeviceAddressType::kCPU; };
|
||||
|
||||
protected:
|
||||
|
|
|
@ -431,7 +431,7 @@ void GPUKernelRuntime::FetchMemUnitSize(const session::KernelGraph *graph) {
|
|||
}
|
||||
}
|
||||
|
||||
void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
|
||||
void GPUKernelRuntime::AssignMemory(const session::KernelGraph &graph) {
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
MS_EXCEPTION_IF_NULL(mem_manager_);
|
||||
|
@ -441,18 +441,17 @@ void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
|
|||
bool is_enable_dynamic_mem = context_ptr->get_param<bool>(MS_CTX_ENABLE_DYNAMIC_MEM_POOL);
|
||||
if (is_enable_dynamic_mem) {
|
||||
// Use the dynamic memory pool.
|
||||
InitKernelRefCount(graph);
|
||||
InitMemorySwapInfo(graph);
|
||||
InitKernelOutputAddress(graph);
|
||||
InitKernelWorkspaceAddress(graph);
|
||||
SaveGraphOutputNode(graph);
|
||||
InitKernelRefCount(&graph);
|
||||
InitMemorySwapInfo(&graph);
|
||||
InitKernelOutputAddress(&graph);
|
||||
InitKernelWorkspaceAddress(&graph);
|
||||
SaveGraphOutputNode(&graph);
|
||||
} else {
|
||||
AssignDynamicMemory(graph);
|
||||
}
|
||||
}
|
||||
|
||||
bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
bool GPUKernelRuntime::Run(const session::KernelGraph &graph, bool is_task_sink) {
|
||||
struct timeval start_time, end_time;
|
||||
(void)gettimeofday(&start_time, nullptr);
|
||||
bool ret = true;
|
||||
|
@ -462,7 +461,7 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink) {
|
|||
bool is_enable_pynative_infer = context_ptr->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER);
|
||||
bool is_pynative_mode = (context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode);
|
||||
if (is_enable_dynamic_mem && !is_pynative_mode && !is_enable_pynative_infer) {
|
||||
auto graph_id = graph->graph_id();
|
||||
auto graph_id = graph.graph_id();
|
||||
auto iter = mem_swap_map_.find(graph_id);
|
||||
if (iter == mem_swap_map_.end()) {
|
||||
MS_LOG(EXCEPTION) << "Find memory swap map failed.";
|
||||
|
@ -476,11 +475,11 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink) {
|
|||
mem_reuse_util_ = mem_reuse_iter->second;
|
||||
MS_EXCEPTION_IF_NULL(mem_reuse_util_);
|
||||
|
||||
ret = RunOneStep(graph);
|
||||
ret = RunOneStep(&graph);
|
||||
} else {
|
||||
if (graph->is_dynamic_shape()) {
|
||||
if (graph.is_dynamic_shape()) {
|
||||
// run dynamic shape graph in pynative
|
||||
ret = RunOpLaunchKernelDynamic(graph);
|
||||
ret = RunOpLaunchKernelDynamic(&graph);
|
||||
} else {
|
||||
ret = LaunchKernels(graph);
|
||||
}
|
||||
|
|
|
@ -43,10 +43,10 @@ class GPUKernelRuntime : public KernelRuntime {
|
|||
bool Init() override;
|
||||
void ReleaseDeviceRes() override;
|
||||
void ClearGraphRuntimeResource(uint32_t graph_id) override;
|
||||
void AssignMemory(session::KernelGraph *graph) override;
|
||||
bool Run(session::KernelGraph *graph, bool is_task_sink) override;
|
||||
bool GenDynamicKernel(const session::KernelGraph *graph) override { return true; }
|
||||
bool RunDynamicKernelAsync(const session::KernelGraph *graph) override { return true; }
|
||||
void AssignMemory(const session::KernelGraph &graph) override;
|
||||
bool Run(const session::KernelGraph &graph, bool is_task_sink) override;
|
||||
bool GenDynamicKernel(const session::KernelGraph &graph) override { return true; }
|
||||
bool RunDynamicKernelAsync(const session::KernelGraph &graph) override { return true; }
|
||||
DeviceAddressType GetTargetDeviceAddressType() const override { return DeviceAddressType::kGPU; }
|
||||
std::shared_ptr<DeviceEvent> CreateDeviceEvent() override;
|
||||
void *compute_stream() const override { return stream_; }
|
||||
|
|
|
@ -46,12 +46,11 @@ constexpr float kMaxMemReuseFactor = 0.8;
|
|||
constexpr float kMinMemReuseFactor = 0.5;
|
||||
constexpr float kRetryFactor = 0.1;
|
||||
namespace {
|
||||
std::vector<AnfNodePtr> GetGraphInputs(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto graph_inputs = graph->inputs();
|
||||
std::vector<AnfNodePtr> GetGraphInputs(const session::KernelGraph &graph) {
|
||||
auto graph_inputs = graph.inputs();
|
||||
std::vector<AnfNodePtr> result(graph_inputs.begin(), graph_inputs.end());
|
||||
std::set<AnfNodePtr> inputs_set(graph_inputs.begin(), graph_inputs.end());
|
||||
auto kernels = graph->execution_order();
|
||||
auto kernels = graph.execution_order();
|
||||
for (auto &kernel : kernels) {
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
auto input_num = AnfAlgo::GetInputTensorNum(kernel);
|
||||
|
@ -71,9 +70,9 @@ std::vector<AnfNodePtr> GetGraphInputs(const session::KernelGraph *graph) {
|
|||
constexpr size_t kMinInputSize = 2;
|
||||
KernelRuntime::~KernelRuntime() {}
|
||||
|
||||
bool KernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) { return true; }
|
||||
bool KernelRuntime::Load(const session::KernelGraph &graph, bool is_task_sink) { return true; }
|
||||
|
||||
bool KernelRuntime::LoadData(session::KernelGraph *) { return false; }
|
||||
bool KernelRuntime::LoadData(const session::KernelGraph &) { return false; }
|
||||
|
||||
bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) {
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
|
@ -85,7 +84,7 @@ bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_
|
|||
return false;
|
||||
}
|
||||
|
||||
void KernelRuntime::AssignMemory(session::KernelGraph *graph) {
|
||||
void KernelRuntime::AssignMemory(const session::KernelGraph &graph) {
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
auto enable_mem_scheduler = context_ptr->get_param<bool>(MS_CTX_ENABLE_MEM_SCHEDULER);
|
||||
|
@ -262,9 +261,8 @@ void KernelRuntime::RunOpMallocPre(const session::KernelGraph &graph,
|
|||
}
|
||||
}
|
||||
|
||||
void KernelRuntime::ResetNodeAddress(session::KernelGraph *kernel_graph) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto kernels = kernel_graph->execution_order();
|
||||
void KernelRuntime::ResetNodeAddress(const session::KernelGraph &kernel_graph) {
|
||||
auto kernels = kernel_graph.execution_order();
|
||||
for (auto &kernel : kernels) {
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
|
@ -303,39 +301,38 @@ void KernelRuntime::ResetNodeAddress(session::KernelGraph *kernel_graph) {
|
|||
}
|
||||
}
|
||||
|
||||
void KernelRuntime::RunOpAssignMemory(const std::vector<tensor::TensorPtr> &input_tensors, session::KernelGraph *graph,
|
||||
void KernelRuntime::RunOpAssignMemory(const std::vector<tensor::TensorPtr> &input_tensors,
|
||||
const session::KernelGraph &graph,
|
||||
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_EXCEPTION_IF_NULL(mem_manager_);
|
||||
mem_manager_->ResetDynamicMemory();
|
||||
|
||||
for (const auto &node : graph->execution_order()) {
|
||||
for (const auto &node : graph.execution_order()) {
|
||||
RunOpAssignCommunicationOutput(node);
|
||||
RunOpAssignCommunicationInput(node);
|
||||
}
|
||||
|
||||
RunOpAssignInputMemory(input_tensors, graph);
|
||||
AssignStaticMemoryValueNode(graph);
|
||||
for (const auto &node : graph->execution_order()) {
|
||||
for (const auto &node : graph.execution_order()) {
|
||||
RunOpAssignOutputMemory(node, tensor_to_node);
|
||||
RunOpAssignWorkSpaceMemory(node);
|
||||
}
|
||||
UpdateRefNodeOutputMem(graph);
|
||||
}
|
||||
|
||||
void KernelRuntime::RunOpClearMemory(const session::KernelGraph *graph) const {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
void KernelRuntime::RunOpClearMemory(const session::KernelGraph &graph) const {
|
||||
// clear input parameter memory resource
|
||||
for (const auto &input_node : graph->inputs()) {
|
||||
for (const auto &input_node : graph.inputs()) {
|
||||
MS_EXCEPTION_IF_NULL(input_node);
|
||||
AnfAlgo::SetOutputAddr(nullptr, 0, input_node.get());
|
||||
}
|
||||
// clear input value node memory resource
|
||||
for (const auto &value_node : graph->graph_value_nodes()) {
|
||||
for (const auto &value_node : graph.graph_value_nodes()) {
|
||||
MS_EXCEPTION_IF_NULL(value_node);
|
||||
AnfAlgo::SetOutputAddr(nullptr, 0, value_node.get());
|
||||
}
|
||||
for (const auto &cnode : graph->execution_order()) {
|
||||
for (const auto &cnode : graph.execution_order()) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
// clear output memory resource
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(cnode);
|
||||
|
@ -372,23 +369,22 @@ bool KernelRuntime::DumpDataEnabledIteration() {
|
|||
}
|
||||
#endif
|
||||
|
||||
void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) {
|
||||
void KernelRuntime::AssignStaticMemory(const session::KernelGraph &graph) {
|
||||
AssignStaticMemoryInput(graph);
|
||||
AssignStaticMemoryValueNode(graph);
|
||||
AssignStaticMemoryOutput(graph);
|
||||
}
|
||||
|
||||
void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> &input_tensors,
|
||||
const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
const session::KernelGraph &graph) {
|
||||
MS_EXCEPTION_IF_NULL(mem_manager_);
|
||||
if (input_tensors.size() != graph->inputs().size()) {
|
||||
if (input_tensors.size() != graph.inputs().size()) {
|
||||
MS_LOG(EXCEPTION) << "Input tensors size " << input_tensors.size()
|
||||
<< " should be equal to graph input parameter size " << graph->inputs().size();
|
||||
<< " should be equal to graph input parameter size " << graph.inputs().size();
|
||||
}
|
||||
|
||||
for (size_t input_index = 0; input_index < graph->inputs().size(); ++input_index) {
|
||||
auto item = graph->inputs()[input_index];
|
||||
for (size_t input_index = 0; input_index < graph.inputs().size(); ++input_index) {
|
||||
auto item = graph.inputs()[input_index];
|
||||
MS_EXCEPTION_IF_NULL(item);
|
||||
if (!item->isa<Parameter>()) {
|
||||
continue;
|
||||
|
@ -400,7 +396,9 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr>
|
|||
auto output_address = std::dynamic_pointer_cast<device::DeviceAddress>(current_tensor->device_address());
|
||||
if (output_address != nullptr && output_address->DeviceType() == GetTargetDeviceAddressType()) {
|
||||
if (output_address->ptr_ == nullptr) {
|
||||
mem_manager_->MallocMemFromMemPool(output_address, output_address->size());
|
||||
if (!mem_manager_->MallocMemFromMemPool(output_address, output_address->size())) {
|
||||
MS_LOG(EXCEPTION) << "Allocate memory failed, size:" << output_address->size();
|
||||
}
|
||||
}
|
||||
|
||||
AnfAlgo::SetOutputAddr(output_address, index, item.get());
|
||||
|
@ -448,7 +446,9 @@ void KernelRuntime::RunOpAssignOutputMemory(
|
|||
MS_EXCEPTION_IF_NULL(address);
|
||||
if (address->ptr() == nullptr) {
|
||||
MS_EXCEPTION_IF_NULL(mem_manager_);
|
||||
mem_manager_->MallocMemFromMemPool(address, address->size());
|
||||
if (!mem_manager_->MallocMemFromMemPool(address, address->size())) {
|
||||
MS_LOG(EXCEPTION) << "Allocate memory failed, size:" << address->size();
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -489,14 +489,13 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) {
|
|||
}
|
||||
}
|
||||
|
||||
void KernelRuntime::RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, session::KernelGraph *graph) {
|
||||
void KernelRuntime::RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, const session::KernelGraph &graph) {
|
||||
if (pre_output_value == nullptr) {
|
||||
return;
|
||||
}
|
||||
std::vector<tensor::TensorPtr> pre_output_tensors;
|
||||
TensorValueToTensor(pre_output_value, &pre_output_tensors);
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto output_nodes = graph->outputs();
|
||||
auto output_nodes = graph.outputs();
|
||||
if (pre_output_tensors.size() != output_nodes.size()) {
|
||||
MS_LOG(EXCEPTION) << "The size of pre output tensors [" << pre_output_tensors.size()
|
||||
<< "] is not equal to the size of output nodes of graph [" << output_nodes.size() << "]";
|
||||
|
@ -536,13 +535,12 @@ void KernelRuntime::RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value
|
|||
}
|
||||
}
|
||||
|
||||
void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph &graph) {
|
||||
MS_EXCEPTION_IF_NULL(mem_manager_);
|
||||
MS_LOG(INFO) << "AssignStaticMemoryInput start for graph " << graph->graph_id();
|
||||
MS_LOG(INFO) << "AssignStaticMemoryInput start for graph " << graph.graph_id();
|
||||
auto graph_inputs = GetGraphInputs(graph);
|
||||
auto graph_valid_input = graph->valid_inputs();
|
||||
graph_inputs.insert(graph_inputs.end(), graph->child_graph_result().begin(), graph->child_graph_result().end());
|
||||
auto graph_valid_input = graph.valid_inputs();
|
||||
graph_inputs.insert(graph_inputs.end(), graph.child_graph_result().begin(), graph.child_graph_result().end());
|
||||
std::vector<AnfNodePtr> need_alloc_nodes;
|
||||
auto add_need_alloc_nodes = [&need_alloc_nodes, graph, this](const AnfNodePtr &node) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
|
@ -553,7 +551,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
|
|||
return;
|
||||
}
|
||||
auto input_param = node->cast<ParameterPtr>();
|
||||
if (input_param != nullptr && !input_param->IsUsedByRealKernelInGraph(graph->graph_id())) {
|
||||
if (input_param != nullptr && !input_param->IsUsedByRealKernelInGraph(graph.graph_id())) {
|
||||
return;
|
||||
}
|
||||
need_alloc_nodes.push_back(node);
|
||||
|
@ -611,7 +609,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
|
|||
CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id, {item, index});
|
||||
MS_LOG(INFO) << "Assign Static Memory for Input node, size:" << tensor_size
|
||||
<< " node:" << item->fullname_with_scope() << " index: " << index;
|
||||
if (mem_manager_->MallocMem(kStaticMem, tensor_size, device_address, graph->graph_id()) == nullptr) {
|
||||
if (mem_manager_->MallocMem(kStaticMem, tensor_size, device_address, graph.graph_id()) == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << tensor_size;
|
||||
}
|
||||
AnfAlgo::SetOutputAddr(device_address, index, item.get());
|
||||
|
@ -620,10 +618,9 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
|
|||
MS_LOG(INFO) << "AssignStaticMemoryInput end";
|
||||
}
|
||||
|
||||
void KernelRuntime::AssignStaticMemoryOutput(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_LOG(INFO) << "AssignStaticMemoryOutput start for graph " << graph->graph_id();
|
||||
auto nodes = AnfAlgo::GetAllOutput(graph->output(), {prim::kPrimTupleGetItem});
|
||||
void KernelRuntime::AssignStaticMemoryOutput(const session::KernelGraph &graph) {
|
||||
MS_LOG(INFO) << "AssignStaticMemoryOutput start for graph " << graph.graph_id();
|
||||
auto nodes = AnfAlgo::GetAllOutput(graph.output(), {prim::kPrimTupleGetItem});
|
||||
std::vector<session::KernelWithIndex> non_communication_op;
|
||||
// Assign Communicate Op Memory firstly.
|
||||
for (const auto &node : nodes) {
|
||||
|
@ -647,9 +644,8 @@ void KernelRuntime::AssignStaticMemoryOutput(const session::KernelGraph *graph)
|
|||
MS_LOG(INFO) << "AssignStaticMemoryOutput end";
|
||||
}
|
||||
|
||||
void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto &kernels = graph->execution_order();
|
||||
void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph &graph) {
|
||||
auto &kernels = graph.execution_order();
|
||||
for (auto &kernel : kernels) {
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
auto output_num = AnfAlgo::GetOutputTensorNum(kernel);
|
||||
|
@ -659,8 +655,8 @@ void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph *graph) {
|
|||
}
|
||||
for (size_t i = 0; i < output_num; ++i) {
|
||||
session::AnfWithOutIndex out_pair(kernel, i);
|
||||
if (graph->IsInRefOutputMap(out_pair)) {
|
||||
auto origin_pair = graph->GetRefCorrespondOutput(out_pair);
|
||||
if (graph.IsInRefOutputMap(out_pair)) {
|
||||
auto origin_pair = graph.GetRefCorrespondOutput(out_pair);
|
||||
MS_EXCEPTION_IF_NULL(origin_pair.first);
|
||||
auto origin_node_output_addr = AnfAlgo::GetMutableOutputAddr(origin_pair.first, origin_pair.second);
|
||||
MS_EXCEPTION_IF_NULL(origin_node_output_addr);
|
||||
|
@ -682,10 +678,9 @@ void KernelRuntime::AssignCommunicationNodeMem(MemType type, const AnfNodePtr &n
|
|||
AssignWorkSpaceMem(type, node);
|
||||
}
|
||||
|
||||
void KernelRuntime::GenKernelEvents(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto &kernels = graph->execution_order();
|
||||
if (kernels.empty() || graph_kernel_events_map_.find(graph->graph_id()) != graph_kernel_events_map_.end()) {
|
||||
void KernelRuntime::GenKernelEvents(const session::KernelGraph &graph) {
|
||||
auto &kernels = graph.execution_order();
|
||||
if (kernels.empty() || graph_kernel_events_map_.find(graph.graph_id()) != graph_kernel_events_map_.end()) {
|
||||
return;
|
||||
}
|
||||
auto kernel_events =
|
||||
|
@ -736,7 +731,7 @@ void KernelRuntime::GenKernelEvents(const session::KernelGraph *graph) {
|
|||
kernel_post_run_events[i].emplace_back([post_event]() { post_event->WaitEvent(); });
|
||||
}
|
||||
}
|
||||
graph_kernel_events_map_[graph->graph_id()] = std::move(kernel_events);
|
||||
graph_kernel_events_map_[graph.graph_id()] = std::move(kernel_events);
|
||||
}
|
||||
|
||||
void KernelRuntime::AssignCommunicationNodeOutputMem(MemType type, const AnfNodePtr &node) {
|
||||
|
@ -989,15 +984,14 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
|
|||
}
|
||||
}
|
||||
|
||||
void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
void KernelRuntime::AssignStaticMemoryValueNode(const session::KernelGraph &graph) {
|
||||
MS_EXCEPTION_IF_NULL(mem_manager_);
|
||||
MS_LOG(DEBUG) << "AssignStaticMemoryValueNode start for graph " << graph->graph_id();
|
||||
MS_LOG(DEBUG) << "AssignStaticMemoryValueNode start for graph " << graph.graph_id();
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
// order the value nodes
|
||||
std::map<std::string, ValueNodePtr> value_nodes_map;
|
||||
for (auto &node : graph->graph_value_nodes()) {
|
||||
for (auto &node : graph.graph_value_nodes()) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
value_nodes_map[node->fullname_with_scope()] = node;
|
||||
}
|
||||
|
@ -1007,22 +1001,18 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
|
|||
MS_EXCEPTION_IF_NULL(value_node);
|
||||
if (NodeOutputDeviceAddressExist(value_node, 0)) {
|
||||
MS_LOG(DEBUG) << "value_node[" << value_node->DebugString() << "] address already exist";
|
||||
|
||||
// TODO(jojo): PyNaitve Infer ?
|
||||
auto device_address = AnfAlgo::GetMutableOutputAddr(value_node, 0);
|
||||
if (device_address->ptr_ == nullptr) {
|
||||
if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER)) {
|
||||
if (!mem_manager_->MallocMemFromMemPool(device_address, device_address->size_)) {
|
||||
MS_LOG(EXCEPTION) << "MallocMemFromMemPool failed";
|
||||
}
|
||||
|
||||
} else {
|
||||
if (mem_manager_->MallocMem(kStaticMem, device_address->size_, device_address, graph->graph_id())) {
|
||||
if (mem_manager_->MallocMem(kStaticMem, device_address->size_, device_address, graph.graph_id())) {
|
||||
MS_LOG(EXCEPTION) << "MallocMem kStaticMem failed";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
auto &node_value = value_node->value();
|
||||
|
@ -1042,7 +1032,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
|
|||
} else {
|
||||
MS_LOG(INFO) << "Assign Static Memory for Value node, size:" << tensor_size
|
||||
<< " node:" << value_node->fullname_with_scope();
|
||||
if (mem_manager_->MallocMem(kStaticMem, tensor_size, address, graph->graph_id()) == nullptr) {
|
||||
if (mem_manager_->MallocMem(kStaticMem, tensor_size, address, graph.graph_id()) == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem
|
||||
<< ", tensor size is: " << tensor_size;
|
||||
}
|
||||
|
@ -1057,8 +1047,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
|
|||
MS_LOG(DEBUG) << "AssignStaticMemoryValueNode end";
|
||||
}
|
||||
|
||||
void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
void KernelRuntime::AssignDynamicMemory(const session::KernelGraph &graph) {
|
||||
MS_EXCEPTION_IF_NULL(mem_manager_);
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
|
@ -1078,7 +1067,7 @@ void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) {
|
|||
} else {
|
||||
MS_LOG(INFO) << "Memory Reuse is disable...";
|
||||
}
|
||||
auto &execution_nodes = graph->execution_order();
|
||||
auto &execution_nodes = graph.execution_order();
|
||||
std::vector<CNodePtr> compute_nodes;
|
||||
// communication nodes first
|
||||
for (auto &node : execution_nodes) {
|
||||
|
@ -1338,17 +1327,16 @@ void KernelRuntime::AssignKernelAddress(const std::shared_ptr<MemScheduler> &mem
|
|||
}
|
||||
|
||||
void KernelRuntime::SyncNodeOutputTensors(const std::shared_ptr<MemScheduler> &mem_scheduler,
|
||||
const session::KernelGraph *graph, const AnfNodePtr &kernel, bool mock) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
const session::KernelGraph &graph, const AnfNodePtr &kernel, bool mock) {
|
||||
MS_EXCEPTION_IF_NULL(mem_scheduler);
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
for (size_t j = 0; j < kernel_mod->GetOutputSizeList().size(); ++j) {
|
||||
auto tensor = graph->GetNodeOutputTensor(std::make_pair(kernel, j));
|
||||
auto tensor = graph.GetNodeOutputTensor(std::make_pair(kernel, j));
|
||||
auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, j, true);
|
||||
if (mock) {
|
||||
if (graph->IsInternalOutput(kernel, j) && device_address != nullptr) {
|
||||
if (graph.IsInternalOutput(kernel, j) && device_address != nullptr) {
|
||||
mem_scheduler->SetMemPriority(device_address.get(), kMemPriorityHigh);
|
||||
}
|
||||
continue;
|
||||
|
@ -1377,11 +1365,10 @@ void KernelRuntime::SyncNodeOutputTensors(const std::shared_ptr<MemScheduler> &m
|
|||
}
|
||||
|
||||
void KernelRuntime::InitGraphInputTensors(const std::shared_ptr<MemScheduler> &mem_scheduler,
|
||||
const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
const session::KernelGraph &graph) {
|
||||
MS_EXCEPTION_IF_NULL(mem_scheduler);
|
||||
auto &input_nodes = graph->input_nodes();
|
||||
auto &input_tensors = graph->input_tensors();
|
||||
auto &input_nodes = graph.input_nodes();
|
||||
auto &input_tensors = graph.input_tensors();
|
||||
if (input_tensors.size() != input_nodes.size()) {
|
||||
MS_LOG_EXCEPTION << "Invalid input tensor size:" << input_tensors.size() << " vs node size:" << input_nodes.size();
|
||||
}
|
||||
|
@ -1407,9 +1394,8 @@ void KernelRuntime::InitGraphInputTensors(const std::shared_ptr<MemScheduler> &m
|
|||
}
|
||||
}
|
||||
|
||||
bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph, const AnfNodePtr &kernel,
|
||||
bool KernelRuntime::LaunchKernel(const session::KernelGraph &graph, const AnfNodePtr &kernel,
|
||||
const std::shared_ptr<MemScheduler> &mem_scheduler, bool mock) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
|
@ -1456,21 +1442,21 @@ bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph, const AnfNod
|
|||
return ret;
|
||||
}
|
||||
|
||||
bool KernelRuntime::LaunchKernelMod(const session::KernelGraph *graph, bool mock) {
|
||||
bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph, bool mock) {
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
std::shared_ptr<MemScheduler> mem_scheduler = nullptr;
|
||||
auto enable_mem_scheduler = context_ptr->get_param<bool>(MS_CTX_ENABLE_MEM_SCHEDULER);
|
||||
if (enable_mem_scheduler) {
|
||||
mem_scheduler = mem_scheduler_manager_.GetOrCreateMemScheduler(graph->graph_id());
|
||||
mem_scheduler = mem_scheduler_manager_.GetOrCreateMemScheduler(graph.graph_id());
|
||||
MS_EXCEPTION_IF_NULL(mem_scheduler);
|
||||
mem_scheduler->SetMemHandler(mem_manager_);
|
||||
mem_scheduler->RecordMemUsage();
|
||||
InitGraphInputTensors(mem_scheduler, graph);
|
||||
}
|
||||
const auto &kernels = graph->execution_order();
|
||||
const auto &kernels = graph.execution_order();
|
||||
std::vector<DynamicKernelPtr> dynamic_kernel_list;
|
||||
auto iter = graph_dynamic_kernel_map_.find(graph->graph_id());
|
||||
auto iter = graph_dynamic_kernel_map_.find(graph.graph_id());
|
||||
if (iter != graph_dynamic_kernel_map_.end()) {
|
||||
dynamic_kernel_list = iter->second;
|
||||
}
|
||||
|
@ -1480,7 +1466,7 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph *graph, bool mock
|
|||
}
|
||||
std::vector<std::vector<std::function<void()>>> kernel_pre_run_events;
|
||||
std::vector<std::vector<std::function<void()>>> kernel_post_run_events;
|
||||
auto events_iter = graph_kernel_events_map_.find(graph->graph_id());
|
||||
auto events_iter = graph_kernel_events_map_.find(graph.graph_id());
|
||||
if (events_iter != graph_kernel_events_map_.end()) {
|
||||
kernel_pre_run_events = events_iter->second.first;
|
||||
kernel_post_run_events = events_iter->second.second;
|
||||
|
@ -1528,13 +1514,12 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph *graph, bool mock
|
|||
return true;
|
||||
}
|
||||
|
||||
void KernelRuntime::UseMemSchedulerIfNeeded(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
void KernelRuntime::UseMemSchedulerIfNeeded(const session::KernelGraph &graph) {
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
auto enable_mem_scheduler = context_ptr->get_param<bool>(MS_CTX_ENABLE_MEM_SCHEDULER);
|
||||
if (enable_mem_scheduler) {
|
||||
auto mem_scheduler = mem_scheduler_manager_.GetOrCreateMemScheduler(graph->graph_id());
|
||||
auto mem_scheduler = mem_scheduler_manager_.GetOrCreateMemScheduler(graph.graph_id());
|
||||
if (mem_scheduler->need_record_event()) {
|
||||
(void)LaunchKernelMod(graph, true);
|
||||
}
|
||||
|
@ -1551,8 +1536,7 @@ void KernelRuntime::UseMemSchedulerIfNeeded(const session::KernelGraph *graph) {
|
|||
}
|
||||
}
|
||||
|
||||
bool KernelRuntime::LaunchKernels(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
bool KernelRuntime::LaunchKernels(const session::KernelGraph &graph) {
|
||||
UseMemSchedulerIfNeeded(graph);
|
||||
if (!LaunchKernelMod(graph)) {
|
||||
MS_LOG(ERROR) << "LaunchKernelMod failed!";
|
||||
|
@ -1574,11 +1558,10 @@ void KernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
|
|||
}
|
||||
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
||||
void KernelRuntime::GetFirstPSEmbeddingCache(const session::KernelGraph *graph,
|
||||
void KernelRuntime::GetFirstPSEmbeddingCache(const session::KernelGraph &graph,
|
||||
AnfNodePtr *const first_cache_input_index,
|
||||
size_t *const first_cache_size) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
for (const auto &kernel : graph->execution_order()) {
|
||||
for (const auto &kernel : graph.execution_order()) {
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel);
|
||||
if (kernel_name != kGatherV2OpName && kernel_name != kSparseGatherV2OpName) {
|
||||
|
@ -1647,13 +1630,12 @@ void KernelRuntime::CheckSparsePSEmbeddingCache(const CNodePtr &node) {
|
|||
}
|
||||
}
|
||||
|
||||
void KernelRuntime::CheckIfSupportPSEmbeddingCache(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
void KernelRuntime::CheckIfSupportPSEmbeddingCache(const session::KernelGraph &graph) {
|
||||
AnfNodePtr first_cache_input_index = nullptr;
|
||||
size_t first_cache_size = 0;
|
||||
GetFirstPSEmbeddingCache(graph, &first_cache_input_index, &first_cache_size);
|
||||
MS_EXCEPTION_IF_NULL(first_cache_input_index);
|
||||
for (const auto &kernel : graph->execution_order()) {
|
||||
for (const auto &kernel : graph.execution_order()) {
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel);
|
||||
if (kernel_name != kGatherV2OpName && kernel_name != kSparseGatherV2OpName) {
|
||||
|
|
|
@ -53,25 +53,26 @@ class KernelRuntime {
|
|||
KernelRuntime() = default;
|
||||
virtual ~KernelRuntime();
|
||||
virtual bool Init() = 0;
|
||||
virtual void AssignMemory(session::KernelGraph *graph);
|
||||
void RunOpAssignMemory(const std::vector<tensor::TensorPtr> &input_tensors, session::KernelGraph *graph,
|
||||
virtual void AssignMemory(const session::KernelGraph &graph);
|
||||
void RunOpAssignMemory(const std::vector<tensor::TensorPtr> &input_tensors, const session::KernelGraph &graph,
|
||||
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node = {});
|
||||
void RunOpAssignCommunicationOutput(const AnfNodePtr &node) const;
|
||||
void RunOpAssignCommunicationInput(const AnfNodePtr &node) const;
|
||||
void RunOpClearMemory(const session::KernelGraph *graph) const;
|
||||
void RunOpClearMemory(const session::KernelGraph &graph) const;
|
||||
void RunOpMallocPre(const session::KernelGraph &graph, const std::vector<tensor::TensorPtr> &input_tensors);
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
static bool DumpDataEnabled();
|
||||
static bool DumpDataEnabledIteration();
|
||||
#endif
|
||||
virtual bool LoadData(session::KernelGraph *graph);
|
||||
virtual bool Load(session::KernelGraph *graph, bool is_task_sink);
|
||||
virtual bool Run(session::KernelGraph *graph, bool is_task_sink) = 0;
|
||||
virtual bool GenDynamicKernel(const session::KernelGraph *graph) = 0;
|
||||
virtual bool RunDynamicKernelAsync(const session::KernelGraph *graph) = 0;
|
||||
bool LaunchKernels(const session::KernelGraph *graph);
|
||||
virtual void AssignStaticMemoryInput(const session::KernelGraph *graph);
|
||||
virtual void AssignStaticMemoryValueNode(session::KernelGraph *graph);
|
||||
virtual bool LoadData(const session::KernelGraph &graph);
|
||||
virtual bool Load(const session::KernelGraph &graph, bool is_task_sink);
|
||||
virtual bool Run(const session::KernelGraph &graph, bool is_task_sink) = 0;
|
||||
virtual bool GenDynamicKernel(const session::KernelGraph &graph) = 0;
|
||||
virtual bool RunDynamicKernelAsync(const session::KernelGraph &graph) = 0;
|
||||
bool LaunchKernels(const session::KernelGraph &graph);
|
||||
virtual void AssignStaticMemoryInput(const session::KernelGraph &graph);
|
||||
virtual void AssignStaticMemoryValueNode(const session::KernelGraph &graph);
|
||||
|
||||
virtual void ClearGraphRuntimeResource(uint32_t graph_id);
|
||||
virtual bool SyncStream() = 0;
|
||||
virtual bool MemcpyAsync(void *dst, const void *src, uint64_t size, int32_t kind) = 0;
|
||||
|
@ -107,13 +108,13 @@ class KernelRuntime {
|
|||
virtual void PreInit() {}
|
||||
#endif
|
||||
virtual uint64_t GetAvailableMemMaxSize() const { return 0; }
|
||||
virtual void GenKernelEvents(const session::KernelGraph *graph);
|
||||
virtual void GenKernelEvents(const session::KernelGraph &graph);
|
||||
virtual std::shared_ptr<DeviceEvent> CreateDeviceEvent() { return nullptr; }
|
||||
virtual std::shared_ptr<DeviceEvent> CreateDeviceTimeEvent() { return nullptr; }
|
||||
virtual DeviceAddressType GetTargetDeviceAddressType() const = 0;
|
||||
virtual void *compute_stream() const { return nullptr; }
|
||||
virtual void *communication_stream() const { return nullptr; }
|
||||
void UpdateRefNodeOutputMem(const session::KernelGraph *graph);
|
||||
void UpdateRefNodeOutputMem(const session::KernelGraph &graph);
|
||||
virtual DeviceAddressPtr AssignExtraStaticMem(const TensorPtr &tensor, const AnfNodePtr &node, size_t index);
|
||||
virtual void *GetModelStream(uint32_t graph_id) const { return nullptr; }
|
||||
|
||||
|
@ -125,8 +126,8 @@ class KernelRuntime {
|
|||
virtual bool NodeOutputDeviceAddressExist(const AnfNodePtr &node, size_t index);
|
||||
virtual bool KernelMemNotReuse(const AnfNodePtr &node);
|
||||
|
||||
void AssignStaticMemory(session::KernelGraph *graph);
|
||||
void AssignDynamicMemory(session::KernelGraph *graph);
|
||||
void AssignStaticMemory(const session::KernelGraph &graph);
|
||||
void AssignDynamicMemory(const session::KernelGraph &graph);
|
||||
void AssignNodeOutputMem(MemType type, const AnfNodePtr &node, int index);
|
||||
void AssignWorkSpaceMem(MemType type, const AnfNodePtr &node);
|
||||
|
||||
|
@ -141,35 +142,35 @@ class KernelRuntime {
|
|||
virtual void KernelLaunchProfiling(const std::string &kernel_name) {}
|
||||
|
||||
private:
|
||||
void UseMemSchedulerIfNeeded(const session::KernelGraph *graph);
|
||||
bool LaunchKernel(const session::KernelGraph *graph, const AnfNodePtr &kernel,
|
||||
void UseMemSchedulerIfNeeded(const session::KernelGraph &graph);
|
||||
bool LaunchKernel(const session::KernelGraph &graph, const AnfNodePtr &kernel,
|
||||
const std::shared_ptr<MemScheduler> &mem_scheduler, bool mock = false);
|
||||
void ResetNodeAddress(session::KernelGraph *graph);
|
||||
void ResetNodeAddress(const session::KernelGraph &graph);
|
||||
void AssignKernelAddress(const std::shared_ptr<MemScheduler> &mem_scheduler, const AnfNodePtr &kernel,
|
||||
AddressPtrList *kernel_inputs, AddressPtrList *kernel_workspaces,
|
||||
AddressPtrList *kernel_outputs);
|
||||
static void GetOrMallocAddress(const std::shared_ptr<MemScheduler> &mem_scheduler,
|
||||
const DeviceAddress *device_address, const kernel::AddressPtr &kernel_addr);
|
||||
void InitGraphInputTensors(const std::shared_ptr<MemScheduler> &mem_scheduler, const session::KernelGraph *graph);
|
||||
void SyncNodeOutputTensors(const std::shared_ptr<MemScheduler> &mem_scheduler, const session::KernelGraph *graph,
|
||||
void InitGraphInputTensors(const std::shared_ptr<MemScheduler> &mem_scheduler, const session::KernelGraph &graph);
|
||||
void SyncNodeOutputTensors(const std::shared_ptr<MemScheduler> &mem_scheduler, const session::KernelGraph &graph,
|
||||
const AnfNodePtr &kernel, bool mock);
|
||||
void AssignStaticMemoryOutput(const session::KernelGraph *graph);
|
||||
bool LaunchKernelMod(const session::KernelGraph *graph, bool mock = false);
|
||||
void AssignStaticMemoryOutput(const session::KernelGraph &graph);
|
||||
bool LaunchKernelMod(const session::KernelGraph &graph, bool mock = false);
|
||||
void LaunchKernelEvent(const std::vector<std::vector<std::function<void()>>> &run_events, size_t index) const;
|
||||
void DebugStreamSync(const CNodePtr &kernel);
|
||||
static void GenAddrCleanLaunchArgs(const CNodePtr &cnode, AddressPtrList *kernel_inputs,
|
||||
const std::shared_ptr<MemScheduler> &mem_schedule = nullptr);
|
||||
void RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> &input_tensors, const session::KernelGraph *graph);
|
||||
void RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> &input_tensors, const session::KernelGraph &graph);
|
||||
void RunOpAssignOutputMemory(const AnfNodePtr &kernel,
|
||||
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node = {});
|
||||
void RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel);
|
||||
void RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, session::KernelGraph *graph);
|
||||
void RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, const session::KernelGraph &graph);
|
||||
void AssignValueNodeTensor(const ValueNodePtr &value_node, const ValuePtr &node_value, size_t output_idx);
|
||||
DeviceAddressPtr PreAssignCNodeMemory(const AnfNodePtr &anf_node, size_t index) const;
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
||||
void GetFirstPSEmbeddingCache(const session::KernelGraph *graph, AnfNodePtr *const first_cache_input_index,
|
||||
void GetFirstPSEmbeddingCache(const session::KernelGraph &graph, AnfNodePtr *const first_cache_input_index,
|
||||
size_t *const first_cache_size);
|
||||
void CheckIfSupportPSEmbeddingCache(const session::KernelGraph *graph);
|
||||
void CheckIfSupportPSEmbeddingCache(const session::KernelGraph &graph);
|
||||
void CheckSparsePSEmbeddingCache(const CNodePtr &node);
|
||||
#endif
|
||||
void RunOpGetCommunicationInputInfo(const AnfNodePtr &node, size_t *total_size,
|
||||
|
|
|
@ -35,18 +35,17 @@ size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) {
|
|||
return (input_size + kMemAlignSize - 1) / kMemAlignSize * kMemAlignSize + 2 * kMemAlignSize;
|
||||
}
|
||||
|
||||
void MemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
void MemoryManager::MallocSomasDynamicMem(const session::KernelGraph &graph) {
|
||||
SomasPtr somas_reuse_util_ptr = std::make_shared<somas::Somas>();
|
||||
MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr);
|
||||
somas_reuse_util_ptr_ = somas_reuse_util_ptr;
|
||||
|
||||
if (!(somas_reuse_util_ptr->Allocate(graph))) {
|
||||
if (!(somas_reuse_util_ptr->Allocate(&graph))) {
|
||||
MS_LOG(EXCEPTION) << "Somas Allocate Failed.";
|
||||
}
|
||||
|
||||
size_t total_allocated_size = somas_reuse_util_ptr->GetTotalMemSize();
|
||||
MS_LOG(INFO) << "Graph " << graph->graph_id() << ": TotalSomasReuseDynamicSize [" << total_allocated_size << "]";
|
||||
MS_LOG(INFO) << "Graph " << graph.graph_id() << ": TotalSomasReuseDynamicSize [" << total_allocated_size << "]";
|
||||
if (total_allocated_size > 0) {
|
||||
auto base_ptr = MallocDynamicMem(total_allocated_size, false);
|
||||
MS_LOG(INFO) << "Somas Reuse Memory Base Address [" << static_cast<void *>(base_ptr) << "], End Address ["
|
||||
|
@ -59,18 +58,18 @@ void MemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) {
|
|||
#ifdef ENABLE_DUMP_IR
|
||||
SubModuleId module = SubModuleId::SM_OPTIMIZER;
|
||||
|
||||
std::string name = "somas_allocate_info." + std::to_string(graph->graph_id());
|
||||
std::string name = "somas_allocate_info." + std::to_string(graph.graph_id());
|
||||
(void)mindspore::RDR::RecordString(module, name, somas_reuse_util_ptr_->SomasInfo());
|
||||
|
||||
name = "somas_mem_info." + std::to_string(graph->graph_id());
|
||||
name = "somas_mem_info." + std::to_string(graph.graph_id());
|
||||
(void)mindspore::RDR::RecordString(module, name, somas_reuse_util_ptr_->SomasMemory());
|
||||
#endif
|
||||
bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
|
||||
if (save_graphs) {
|
||||
std::string file_path = GetSaveGraphsPathName("somas_allocate_info_" + std::to_string(graph->graph_id()) + ".ir");
|
||||
std::string file_path = GetSaveGraphsPathName("somas_allocate_info_" + std::to_string(graph.graph_id()) + ".ir");
|
||||
somas_reuse_util_ptr_->DumpSomasInfoIR(file_path);
|
||||
|
||||
std::string mem_file_path = GetSaveGraphsPathName("somas_mem_info_" + std::to_string(graph->graph_id()) + ".ir");
|
||||
std::string mem_file_path = GetSaveGraphsPathName("somas_mem_info_" + std::to_string(graph.graph_id()) + ".ir");
|
||||
somas_reuse_util_ptr_->DumpSomasMemoryIR(mem_file_path);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,7 +44,7 @@ class MemoryManager : public MemHandler {
|
|||
}
|
||||
virtual void ClearGlobalIdleMem() {}
|
||||
|
||||
virtual void MallocSomasDynamicMem(const session::KernelGraph *graph);
|
||||
virtual void MallocSomasDynamicMem(const session::KernelGraph &graph);
|
||||
uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size,
|
||||
const DeviceAddressPtr &address, bool comm_mem);
|
||||
uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, MemType type, size_t size);
|
||||
|
|
|
@ -1231,7 +1231,8 @@ AbstractBasePtr InferImplDynamicStitch(const AnalysisEnginePtr &, const Primitiv
|
|||
AbstractBasePtr InferImplTensorCopySlices(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
const AbstractBasePtrList &args_spec_list) {
|
||||
auto &op_name = primitive->name();
|
||||
CheckArgsSize(op_name, args_spec_list, 5);
|
||||
constexpr auto kTensorCopySlicesInputNum = 5;
|
||||
CheckArgsSize(op_name, args_spec_list, kTensorCopySlicesInputNum);
|
||||
AbstractTensorPtr input = CheckArg<AbstractTensor>(op_name, args_spec_list, 0);
|
||||
return std::make_shared<AbstractTensor>(input->element(), input->shape());
|
||||
}
|
||||
|
|
|
@ -493,7 +493,7 @@ AbstractBasePtr InferImplReduceScatter(const AnalysisEnginePtr &, const Primitiv
|
|||
if (tmp_shape.empty()) {
|
||||
MS_LOG(EXCEPTION) << "shape size is 0";
|
||||
}
|
||||
tmp_shape[0] = IntMulWithOverflowCheck(tmp_shape[0], rank_size);
|
||||
tmp_shape[0] = LongMulWithOverflowCheck(tmp_shape[0], rank_size);
|
||||
return std::make_shared<AbstractTensor>(x->element(), std::make_shared<Shape>(tmp_shape));
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ from ..operations import _inner_ops as inner
|
|||
from .. import functional as F
|
||||
from ..composite.multitype_ops.zeros_like_impl import zeros_like
|
||||
|
||||
|
||||
@bprop_getters.register(inner.TensorCopySlices)
|
||||
def get_bprop_tensor_copy_slices(self):
|
||||
"""Generate bprop for TensorCopySlices"""
|
||||
|
|
Loading…
Reference in New Issue