clean codex

This commit is contained in:
caifubi 2021-09-17 14:22:37 +08:00
parent 0f065c87e2
commit 0683b7fd75
27 changed files with 241 additions and 274 deletions

View File

@ -812,8 +812,7 @@ std::string TbeKernelJsonCreator::GetDeviceOutputFormat(const AnfNodePtr &anf_no
return format;
}
void GetInputSizeList(const nlohmann::json &input_json, std::vector<size_t> *input_size_list,
const AnfNodePtr &anf_node) {
void GetInputSizeList(const nlohmann::json &input_json, std::vector<size_t> *input_size_list) {
for (size_t i = 0; i < input_json.size(); i++) {
for (size_t m = 0; m < input_json[i].size(); m++) {
size_t size_i = 1;
@ -840,8 +839,7 @@ void GetInputSizeList(const nlohmann::json &input_json, std::vector<size_t> *inp
}
}
void GetOutputSizeList(const nlohmann::json &output_json, std::vector<size_t> *output_size_list,
const AnfNodePtr &anf_node) {
void GetOutputSizeList(const nlohmann::json &output_json, std::vector<size_t> *output_size_list) {
for (size_t i = 0; i < output_json.size(); i++) {
for (size_t m = 0; m < output_json[i].size(); m++) {
size_t size_i = 1;
@ -878,8 +876,8 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<si
}
input_size_list->clear();
output_size_list->clear();
GetInputSizeList(kernel_json[kJOpInfo][kJInputs], input_size_list, anf_node);
GetOutputSizeList(kernel_json[kJOpInfo][kJOutputs], output_size_list, anf_node);
GetInputSizeList(kernel_json[kJOpInfo][kJInputs], input_size_list);
GetOutputSizeList(kernel_json[kJOpInfo][kJOutputs], output_size_list);
return true;
}

View File

@ -733,8 +733,8 @@ void AscendSession::CompileChildGraph(const KernelGraphPtr &child_graph) {
if (!enable_mem_scheduler) {
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
runtime_instance->AssignStaticMemoryInput(child_graph.get());
runtime_instance->AssignStaticMemoryValueNode(child_graph.get());
runtime_instance->AssignStaticMemoryInput(*child_graph);
runtime_instance->AssignStaticMemoryValueNode(*child_graph);
}
}
@ -822,7 +822,7 @@ void AscendSession::BindAddressToTensor(
}
}
void AscendSession::LaunchFunc(const KernelGraphPtr &graph, const std::vector<int64_t> &tensors_mask,
void AscendSession::LaunchFunc(const KernelGraphPtr &graph,
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node,
bool is_dynamic_shape, const std::vector<tensor::TensorPtr> &input_tensors) {
// Wait for AllReduce
@ -887,7 +887,7 @@ void AscendSession::PrepareForOutputTensor(const KernelGraphPtr &graph,
// Create DeviceAddress For Output Tensor(contain: Shape, Format, DType)
auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime();
runtime_instance->RunOpMallocPre(*graph, input_tensors);
runtime_instance->UpdateRefNodeOutputMem(graph.get());
runtime_instance->UpdateRefNodeOutputMem(*graph);
// CREATE OUTPUT TENSOR ADDRESS
UpdateOutputs(graph, outputs, input_tensors, tensor_to_node);
}
@ -951,7 +951,7 @@ void AscendSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_inf
auto &task_manager = PynativeTaskManager::GetInstance();
if (!cache_miss && task_manager.QueueEmpty()) {
// Cache match and there are no task in Queue. Just Launch immediately.
LaunchFunc(graph, tensors_mask, tensor_to_node, op_run_info->is_dynamic_shape, *input_tensors);
LaunchFunc(graph, tensor_to_node, op_run_info->is_dynamic_shape, *input_tensors);
} else {
auto run_op_context = std::make_shared<RunOpContext>(graph_info, op_run_info->is_dynamic_shape, graph, tensors_mask,
*input_tensors, tensor_to_node);
@ -1320,7 +1320,7 @@ void AscendSession::BuildDynamicKernel(const std::shared_ptr<KernelGraph> &kerne
}
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
if (!runtime_instance->GenDynamicKernel(kernel_graph.get())) {
if (!runtime_instance->GenDynamicKernel(*kernel_graph)) {
MS_LOG(DEBUG) << "Graph:" << kernel_graph->graph_id() << " failed to generate dynamic kernel!";
}
MS_LOG(DEBUG) << "Finish!";
@ -1460,7 +1460,7 @@ void AscendSession::MemoryAlloc(KernelGraph *kernel_graph) const {
InitMemReuseExecOrder(kernel_graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
runtime_instance->AssignMemory(kernel_graph);
runtime_instance->AssignMemory(*kernel_graph);
MS_LOG(INFO) << "Finish!";
}
@ -1469,7 +1469,7 @@ void AscendSession::RunOpMemoryAlloc(const std::vector<tensor::TensorPtr> &input
MS_EXCEPTION_IF_NULL(kernel_graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph);
runtime_instance->RunOpAssignMemory(input_tensors, *kernel_graph);
}
void AscendSession::RunOpMemoryAllocNew(const std::vector<tensor::TensorPtr> &input_tensors,
@ -1478,21 +1478,21 @@ void AscendSession::RunOpMemoryAllocNew(const std::vector<tensor::TensorPtr> &in
MS_EXCEPTION_IF_NULL(kernel_graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph, tensor_to_node);
runtime_instance->RunOpAssignMemory(input_tensors, *kernel_graph, tensor_to_node);
}
void AscendSession::RunOpGenKernelEvent(const KernelGraph *graph) const {
MS_EXCEPTION_IF_NULL(graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
runtime_instance->GenKernelEvents(graph);
runtime_instance->GenKernelEvents(*graph);
}
void AscendSession::RunOpMemoryClear(const KernelGraph *kernel_graph) const {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
runtime_instance->RunOpClearMemory(kernel_graph);
runtime_instance->RunOpClearMemory(*kernel_graph);
}
void AscendSession::Load(const std::shared_ptr<KernelGraph> &kernel_graph) const {
@ -1503,7 +1503,7 @@ void AscendSession::Load(const std::shared_ptr<KernelGraph> &kernel_graph) const
(void)device::KernelAdjust::GetInstance().StepLoadCtrlInputs(kernel_graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
bool ret_ok = runtime_instance->Load(kernel_graph.get(), is_task_sink);
bool ret_ok = runtime_instance->Load(*kernel_graph, is_task_sink);
if (!ret_ok) {
MS_LOG(EXCEPTION) << "Load task error!";
}
@ -1525,7 +1525,7 @@ void AscendSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph, bo
DumpSetup(kernel_graph);
#endif
}
bool ret_ok = runtime_instance->Run(kernel_graph.get(), is_task_sink);
bool ret_ok = runtime_instance->Run(*kernel_graph, is_task_sink);
if (is_task && is_task_sink) {
#ifndef ENABLE_SECURITY
Dump(kernel_graph);
@ -1599,7 +1599,7 @@ void AscendSession::LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph)
MS_EXCEPTION_IF_NULL(kernel_graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
(void)runtime_instance->LoadData(kernel_graph.get());
(void)runtime_instance->LoadData(*kernel_graph);
MS_LOG(INFO) << "Finish!";
}
@ -1884,8 +1884,8 @@ void AscendSession::AssignStaticMemory(NotNull<KernelGraphPtr> graph,
auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
runtime_instance->ClearGlobalIdleMem();
runtime_instance->AssignStaticMemoryInput(graph.get().get());
runtime_instance->AssignStaticMemoryValueNode(graph.get().get());
runtime_instance->AssignStaticMemoryInput(*graph.get());
runtime_instance->AssignStaticMemoryValueNode(*graph.get());
for (auto &child_graph : graph->child_graph_order()) {
AssignStaticMemory(NOT_NULL(child_graph.lock()), memo);
}
@ -1977,8 +1977,7 @@ void AscendSession::ExecuteAllTaskInQueue() {
while (!launch_tasks.empty()) {
auto &launch_task = launch_tasks.front();
const auto &context = launch_task->context();
LaunchFunc(context->graph(), context->tensor_mask(), context->tensor_to_node(), context->is_dynamic_shape(),
context->input_tensors());
LaunchFunc(context->graph(), context->tensor_to_node(), context->is_dynamic_shape(), context->input_tensors());
launch_tasks.pop();
}

View File

@ -153,7 +153,7 @@ class AscendSession : public SessionBasic {
VectorRef *outputs) const;
std::shared_ptr<device::Bucket> CreateBucket(uint32_t bucket_id, uint32_t bucket_size) override;
void LaunchFunc(const KernelGraphPtr &graph, const std::vector<int64_t> &tensors_mask,
void LaunchFunc(const KernelGraphPtr &graph,
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node, bool is_dynamic_shape,
const std::vector<tensor::TensorPtr> &input_tensors);
KernelGraphPtr CreateKernelGraph(const GraphInfo &graph_info, OpRunInfo *op_run_info,

View File

@ -209,7 +209,7 @@ void CPUSession::PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_gra
}
void CPUSession::ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph) {
bool ret = runtime_.Run(kernel_graph.get(), false);
bool ret = runtime_.Run(*kernel_graph, false);
if (!ret) {
MS_LOG(EXCEPTION) << "Run graph failed";
}
@ -291,7 +291,7 @@ void CPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
runtime_.CreateOutputTensors(kernel_graph.get(), *input_tensors, outputs, &tensor_to_node);
runtime_.BindInputOutput(kernel_graph.get(), *input_tensors, outputs);
bool ret = runtime_.Run(kernel_graph.get(), false);
bool ret = runtime_.Run(*kernel_graph, false);
if (!ret) {
MS_LOG(EXCEPTION) << "Run Op failed";
}
@ -301,7 +301,7 @@ void CPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
UpdateOutputAbstract(kernel_graph, op_run_info);
}
SetOutputFlags(*outputs);
runtime_.RunOpClearMemory(kernel_graph.get());
runtime_.RunOpClearMemory(*kernel_graph);
}
void CPUSession::SetKernelInfo(const KernelGraph *kernel_graph) {

View File

@ -248,7 +248,7 @@ void GPUSession::AllocateMemory(KernelGraph *kernel_graph) const {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
runtime_instance->AssignMemory(kernel_graph);
runtime_instance->AssignMemory(*kernel_graph);
}
void GPUSession::RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input_tensors,
@ -256,21 +256,21 @@ void GPUSession::RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input
MS_EXCEPTION_IF_NULL(kernel_graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph);
runtime_instance->RunOpAssignMemory(input_tensors, *kernel_graph);
}
void GPUSession::RunOpGenKernelEvent(const KernelGraph *graph) const {
MS_EXCEPTION_IF_NULL(graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
runtime_instance->GenKernelEvents(graph);
runtime_instance->GenKernelEvents(*graph);
}
void GPUSession::RunOpClearMemory(KernelGraph *kernel_graph) const {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
runtime_instance->RunOpClearMemory(kernel_graph);
runtime_instance->RunOpClearMemory(*kernel_graph);
}
namespace {
@ -626,7 +626,7 @@ void GPUSession::UpdateOutputTensors(const VectorRef *outputs,
void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const {
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance);
if (!runtime_instance->Run(kernel_graph.get(), false)) {
if (!runtime_instance->Run(*kernel_graph, false)) {
MS_LOG(EXCEPTION) << "GPU execute graph failed!";
}
}

View File

@ -99,7 +99,7 @@ void DumpJsonParser::Parse() {
std::ifstream json_file(dump_config_file.value());
if (!json_file.is_open()) {
MS_LOG(EXCEPTION) << "Dump file:" << dump_config_file.value() << " open failed."
<< " Errno:" << errno << " ErrInfo:" << strerror(errno);
<< " Errno:" << errno;
}
nlohmann::json j;
@ -586,13 +586,13 @@ bool DumpJsonParser::OutputNeedDump() const {
return input_output_ == kDumpInputAndOutput || input_output_ == kDumpOutputOnly;
}
void DumpJsonParser::UpdateNeedDumpKernels(NotNull<const session::KernelGraph *> kernel_graph) {
void DumpJsonParser::UpdateNeedDumpKernels(const session::KernelGraph &kernel_graph) {
if (!async_dump_enabled_) {
return;
}
MS_LOG(INFO) << "Update async dump kernel list for hccl";
std::map<std::string, uint32_t> update_kernels;
for (const auto &kernel : kernel_graph->execution_order()) {
for (const auto &kernel : kernel_graph.execution_order()) {
MS_EXCEPTION_IF_NULL(kernel);
if (AnfAlgo::GetKernelType(kernel) == HCCL_KERNEL &&
DumpJsonParser::GetInstance().NeedDump(GetKernelNodeName(kernel))) {

View File

@ -61,7 +61,7 @@ class DumpJsonParser {
bool InputNeedDump() const;
bool OutputNeedDump() const;
std::string GetOpOverflowBinPath(uint32_t graph_id) const;
void UpdateNeedDumpKernels(NotNull<const session::KernelGraph *> kernel_graph);
void UpdateNeedDumpKernels(const session::KernelGraph &kernel_graph);
void ClearGraph() { graphs_.clear(); }
void SaveGraph(session::KernelGraph *graph) { (void)graphs_.emplace_back(graph); }

View File

@ -19,6 +19,7 @@
#include <memory>
#include <utility>
#include <algorithm>
#include <set>
#include "utils/signal_util.h"
#include "runtime/device/ascend/ascend_device_address.h"
#include "runtime/device/ascend/distribute/ascend_collective.h"
@ -372,8 +373,7 @@ bool AscendKernelRuntime::Init() {
return true;
}
bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
bool AscendKernelRuntime::LoadData(const session::KernelGraph &graph) {
#ifdef ENABLE_DEBUGGER
MS_LOG(INFO) << "Start load step";
for (const auto &graph_ptr : debugger_->GetGraphPtrList()) {
@ -412,7 +412,7 @@ DeviceAddressPtr AscendKernelRuntime::CreateDeviceAddress(void *device_ptr, size
return std::make_shared<AscendDeviceAddress>(device_ptr, device_size, format, type_id, node_index);
}
bool AscendKernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) {
bool AscendKernelRuntime::Load(const session::KernelGraph &graph, bool is_task_sink) {
if (!is_task_sink) {
MS_LOG(INFO) << "Graph mode with not task sink";
GenKernelEvents(graph);
@ -428,10 +428,9 @@ bool AscendKernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) {
return true;
}
bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph &graph) {
MS_LOG(INFO) << "GenDynamicKernel start";
auto cnode_list = graph->execution_order();
auto cnode_list = graph.execution_order();
std::vector<DynamicKernelPtr> dynamic_kernels;
for (const auto &cnode : cnode_list) {
MS_EXCEPTION_IF_NULL(cnode);
@ -445,15 +444,14 @@ bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph *graph) {
dynamic_kernel->Initialize();
dynamic_kernels.emplace_back(dynamic_kernel);
}
graph_dynamic_kernel_map_[graph->graph_id()] = std::move(dynamic_kernels);
graph_dynamic_kernel_map_[graph.graph_id()] = std::move(dynamic_kernels);
MS_LOG(INFO) << "GenDynamicKernel end";
return true;
}
bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
bool AscendKernelRuntime::GenTask(const session::KernelGraph &graph) {
SetCurrentContext();
if (graph->is_dynamic_shape()) {
if (graph.is_dynamic_shape()) {
if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE && (ConfigManager::GetInstance().iter_num() > 1)) {
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with dataset_sink_mode.";
}
@ -465,9 +463,9 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
MS_LOG(INFO) << "Dynamic Shape Graph Generate Dynamic kernel";
return GenDynamicKernel(graph);
}
MS_LOG(INFO) << "GenTask start. GraphId:" << graph->graph_id();
MS_LOG(INFO) << "GenTask start. GraphId:" << graph.graph_id();
#ifndef ENABLE_SECURITY
DumpJsonParser::GetInstance().UpdateNeedDumpKernels(NOT_NULL(graph));
DumpJsonParser::GetInstance().UpdateNeedDumpKernels(graph);
#endif
#ifdef MEM_REUSE_DEBUG
if (!EnvConfigParser::GetInstance().GetSysMemreuse()) {
@ -476,19 +474,19 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
}
#endif
vector<std::shared_ptr<TaskInfo>> task_info_list;
auto anf_node_list = graph->execution_order();
auto anf_node_list = graph.execution_order();
auto task_generator = TaskGenerator();
if (!task_generator.GenTasks(anf_node_list, &task_info_list, graph->graph_id())) {
if (!task_generator.GenTasks(anf_node_list, &task_info_list, graph.graph_id())) {
return false;
}
// Store the task_info_list
auto insert_ret = task_map_.insert(std::make_pair(graph->graph_id(), task_info_list));
auto insert_ret = task_map_.insert(std::make_pair(graph.graph_id(), task_info_list));
if (!insert_ret.second) {
MS_LOG(EXCEPTION) << "Duplicate GraphId! Please check in ascend_session.";
}
// Graph may have no compute node, such TensorAddGrad.
if (task_info_list.empty()) {
MS_LOG(WARNING) << "Graph " << graph->graph_id() << " have no compute node";
MS_LOG(WARNING) << "Graph " << graph.graph_id() << " have no compute node";
return true;
}
AscendStreamAssign &assign_instance = AscendStreamAssign::GetInstance();
@ -500,13 +498,13 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
assign_instance.GetHcomStreams(&force_copy_stream_list);
MS_LOG(INFO) << "Call DavinciModel total stream num:" << resource_manager.get_cur_stream_num()
<< ", total event num:" << resource_manager.get_cur_event_num()
<< ", total label num:" << graph->label_num()
<< ", total label num:" << graph.label_num()
<< ", wait_active_stream_list size:" << wait_active_stream_list.size()
<< ", force_copy_stream_list size:" << force_copy_stream_list.size();
auto model = std::make_shared<ge::model_runner::DavinciModel>(
task_info_list, wait_active_stream_list, force_copy_stream_list, 0, 0, 0, 0, 0, 0,
resource_manager.get_cur_stream_num(), graph->label_num(), resource_manager.get_cur_event_num(), 0);
auto ret = graph_model_map_.insert(std::make_pair(graph->graph_id(), model));
resource_manager.get_cur_stream_num(), graph.label_num(), resource_manager.get_cur_event_num(), 0);
auto ret = graph_model_map_.insert(std::make_pair(graph.graph_id(), model));
if (!ret.second) {
MS_LOG(EXCEPTION) << "Duplicate GraphId! Please check in ascend_session.";
}
@ -514,23 +512,22 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
return true;
}
bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
bool AscendKernelRuntime::LoadTask(const session::KernelGraph &graph) {
SetCurrentContext();
if (graph->is_dynamic_shape()) {
if (graph.is_dynamic_shape()) {
MS_LOG(INFO) << "Dynamic Shape Graph Skip Load Task Step";
return true;
}
MS_LOG(INFO) << "LoadTask start. GraphId:" << graph->graph_id();
MS_LOG(INFO) << "LoadTask start. GraphId:" << graph.graph_id();
if (GraphWithEmptyTaskList(graph)) {
MS_LOG(WARNING) << "LoadTask end, task list is empty";
return true;
}
auto model_iter = graph_model_map_.find(graph->graph_id());
auto model_iter = graph_model_map_.find(graph.graph_id());
if (model_iter == graph_model_map_.end()) {
MS_LOG(ERROR) << "GraphId:" << graph->graph_id() << " Invalid! Graph LoadTask without GenTask.";
MS_LOG(ERROR) << "GraphId:" << graph.graph_id() << " Invalid! Graph LoadTask without GenTask.";
return false;
}
@ -540,7 +537,7 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
#ifndef ENABLE_SECURITY
std::function<void *()> model_handle =
std::bind(&ModelRunner::GetModelHandle, &ModelRunner::Instance(), model_iter->first);
DistributeDebugTask(NOT_NULL(graph), NOT_NULL(model_handle));
DistributeDebugTask(graph, NOT_NULL(model_handle));
#endif
try {
@ -556,9 +553,9 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
if (ProfilingManager::GetInstance().IsProfiling()) {
auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first);
auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first);
ProfilingUtils::ReportProfilingData(task_ids, stream_ids, *graph);
ProfilingUtils::ReportProfilingData(task_ids, stream_ids, graph);
}
LaunchDataDump(graph->graph_id());
LaunchDataDump(graph.graph_id());
#endif
ModelRunner::Instance().LoadModelComplete(model_iter->first);
@ -566,18 +563,18 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
}
#ifndef ENABLE_SECURITY
void AscendKernelRuntime::DistributeDebugTask(NotNull<const session::KernelGraph *> graph,
void AscendKernelRuntime::DistributeDebugTask(const session::KernelGraph &graph,
const NotNull<std::function<void *()>> &model_handle) {
if (!DumpJsonParser::GetInstance().async_dump_enabled()) {
return;
}
MS_LOG(INFO) << "Start Distribute Debug Task";
auto data_dumper = std::make_shared<DataDumper>(graph.get(), model_handle);
auto data_dumper = std::make_shared<DataDumper>(&graph, model_handle);
MS_EXCEPTION_IF_NULL(data_dumper);
auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper);
auto ret = graph_data_dumper_.try_emplace(graph.graph_id(), data_dumper);
data_dumper->OpDebugRegister();
if (!ret.second) {
MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed";
MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph.graph_id() << " data dumper failed";
}
}
@ -671,8 +668,7 @@ std::string AscendKernelRuntime::GetDumpPath() {
}
#ifndef ENABLE_SECURITY
void AscendKernelRuntime::DumpTaskExceptionInfo(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
void AscendKernelRuntime::DumpTaskExceptionInfo(const session::KernelGraph &graph) {
const std::string path = GetDumpPath();
if (access(path.c_str(), F_OK) == 0) {
if (!DeleteDumpDir(path)) {
@ -697,10 +693,9 @@ void AscendKernelRuntime::DumpTaskExceptionInfo(const session::KernelGraph *grap
}
#endif
bool AscendKernelRuntime::Run(session::KernelGraph *const graph, bool is_task_sink) {
bool AscendKernelRuntime::Run(const session::KernelGraph &graph, bool is_task_sink) {
const uint64_t kUSecondInSecond = 1000000;
SignalGuard sg(IntHandler);
MS_EXCEPTION_IF_NULL(graph);
bool ret = false;
if (is_task_sink) {
@ -784,10 +779,9 @@ void AscendKernelRuntime::SetKernelModStream(const std::vector<CNodePtr> &kernel
[](const std::pair<void *, size_t> &item) { return item.second; });
}
void AscendKernelRuntime::GenKernelEvents(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
auto &kernels = graph->execution_order();
if (kernels.empty() || graph_kernel_events_map_.find(graph->graph_id()) != graph_kernel_events_map_.end()) {
void AscendKernelRuntime::GenKernelEvents(const session::KernelGraph &graph) {
auto &kernels = graph.execution_order();
if (kernels.empty() || graph_kernel_events_map_.find(graph.graph_id()) != graph_kernel_events_map_.end()) {
return;
}
std::vector<size_t> last_stream_nodes;
@ -840,7 +834,7 @@ void AscendKernelRuntime::GenKernelEvents(const session::KernelGraph *graph) {
}
}
ProcessBoundaryEvent(kernels, &kernel_post_run_events, last_stream_nodes);
graph_kernel_events_map_[graph->graph_id()] = std::move(kernel_events);
graph_kernel_events_map_[graph.graph_id()] = std::move(kernel_events);
}
void AscendKernelRuntime::ProcessBoundaryEvent(const std::vector<CNodePtr> &kernels,
@ -882,12 +876,11 @@ void AscendKernelRuntime::ProcessBoundaryEvent(const std::vector<CNodePtr> &kern
}
}
bool AscendKernelRuntime::RunDynamicKernelAsync(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
MS_LOG(INFO) << "RunExecutorAsync start. GraphId:" << graph->graph_id();
auto iter = graph_dynamic_kernel_map_.find(graph->graph_id());
bool AscendKernelRuntime::RunDynamicKernelAsync(const session::KernelGraph &graph) {
MS_LOG(INFO) << "RunExecutorAsync start. GraphId:" << graph.graph_id();
auto iter = graph_dynamic_kernel_map_.find(graph.graph_id());
if (iter == graph_dynamic_kernel_map_.end()) {
MS_LOG(ERROR) << "GraphId:" << graph->graph_id() << " Not Found! Please generator executor first";
MS_LOG(ERROR) << "GraphId:" << graph.graph_id() << " Not Found! Please generator executor first";
return false;
}
@ -919,16 +912,15 @@ bool AscendKernelRuntime::RunDynamicKernelAsync(const session::KernelGraph *grap
return true;
}
bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) {
current_graph_ = graph;
bool AscendKernelRuntime::RunTask(const session::KernelGraph &graph) {
current_graph_ = &graph;
SetCurrentContext();
MS_EXCEPTION_IF_NULL(graph);
if (graph->is_dynamic_shape()) {
if (graph.is_dynamic_shape()) {
MS_LOG(INFO) << "Dynamic Shape Graph Run Task Async";
return RunDynamicKernelAsync(graph);
}
MS_LOG(INFO) << "RunTask start. GraphId:" << graph->graph_id();
MS_LOG(INFO) << "RunTask start. GraphId:" << graph.graph_id();
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
@ -937,13 +929,13 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) {
return true;
}
if (!CheckGraphIdValid(graph->graph_id())) {
MS_LOG(ERROR) << "GraphId:" << graph->graph_id() << " Invalid! Graph RunTask without GenTask.";
if (!CheckGraphIdValid(graph.graph_id())) {
MS_LOG(ERROR) << "GraphId:" << graph.graph_id() << " Invalid! Graph RunTask without GenTask.";
return false;
}
try {
ModelRunner::Instance().RunModel(graph->graph_id());
ModelRunner::Instance().RunModel(graph.graph_id());
} catch (const std::exception &) {
#ifndef ENABLE_SECURITY
DumpTaskExceptionInfo(graph);
@ -1139,9 +1131,8 @@ bool AscendKernelRuntime::DestroyHccl() {
return true;
}
bool AscendKernelRuntime::GraphWithEmptyTaskList(const session::KernelGraph *graph) const {
MS_EXCEPTION_IF_NULL(graph);
auto iter = task_map_.find(graph->graph_id());
bool AscendKernelRuntime::GraphWithEmptyTaskList(const session::KernelGraph &graph) const {
auto iter = task_map_.find(graph.graph_id());
if (iter == task_map_.end()) {
MS_LOG(EXCEPTION) << "Unknown graph ptr";
}

View File

@ -41,19 +41,19 @@ class AscendKernelRuntime : public KernelRuntime {
AscendKernelRuntime() = default;
~AscendKernelRuntime() override;
bool Init() override;
bool LoadData(session::KernelGraph *graph) override;
bool GenTask(const session::KernelGraph *graph);
void GenKernelEvents(const session::KernelGraph *graph) override;
bool LoadData(const session::KernelGraph &graph) override;
bool GenTask(const session::KernelGraph &graph);
void GenKernelEvents(const session::KernelGraph &graph) override;
void SetKernelModStream(const std::vector<CNodePtr> &kernels, std::vector<size_t> *last_stream_nodes);
void ProcessBoundaryEvent(const std::vector<CNodePtr> &kernels,
std::vector<std::vector<std::function<void()>>> *kernel_run_events,
const std::vector<size_t> &last_stream_nodes);
bool GenDynamicKernel(const session::KernelGraph *graph) override;
bool RunDynamicKernelAsync(const session::KernelGraph *graph) override;
bool LoadTask(const session::KernelGraph *graph);
bool RunTask(const session::KernelGraph *graph);
bool Load(session::KernelGraph *graph, bool is_task_sink) override;
bool Run(session::KernelGraph *graph, bool is_task_sink) override;
bool GenDynamicKernel(const session::KernelGraph &graph) override;
bool RunDynamicKernelAsync(const session::KernelGraph &graph) override;
bool LoadTask(const session::KernelGraph &graph);
bool RunTask(const session::KernelGraph &graph);
bool Load(const session::KernelGraph &graph, bool is_task_sink) override;
bool Run(const session::KernelGraph &graph, bool is_task_sink) override;
void ClearGraphRuntimeResource(uint32_t graph_id) override;
void ClearGlobalIdleMem() override;
bool SyncStream() override;
@ -91,18 +91,17 @@ class AscendKernelRuntime : public KernelRuntime {
void ClearGraphModelMap();
void ReleaseDeviceRes() override;
bool GraphWithEmptyTaskList(const session::KernelGraph *graph) const;
bool GraphWithEmptyTaskList(const session::KernelGraph &graph) const;
bool CheckGraphIdValid(GraphId graph_id) const;
#ifndef ENABLE_SECURITY
void DistributeDebugTask(NotNull<const session::KernelGraph *> graph,
const NotNull<std::function<void *()>> &model_handle);
void DistributeDebugTask(const session::KernelGraph &graph, const NotNull<std::function<void *()>> &model_handle);
void LaunchDataDump(GraphId graph_id);
void ReportProfilingData();
#endif
static CNodePtr GetErrorNodeName(uint32_t streamid, uint32_t taskid);
static std::string GetDumpPath();
#ifndef ENABLE_SECURITY
static void DumpTaskExceptionInfo(const session::KernelGraph *graph);
static void DumpTaskExceptionInfo(const session::KernelGraph &graph);
#endif
static void TaskFailCallback(rtExceptionInfo *task_fail_info);
static bool DeleteDumpDir(const std::string &path);

View File

@ -197,11 +197,11 @@ uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_m
}
}
void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) {
void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph &graph) {
MemoryManager::MallocSomasDynamicMem(graph);
#ifndef ENABLE_SECURITY
if (MemoryProfiling::GetInstance().IsMemoryProfilingEnable()) {
somas_reuse_util_ptr_->ConvertToProfilingNode(graph->graph_id());
somas_reuse_util_ptr_->ConvertToProfilingNode(graph.graph_id());
}
#endif
}

View File

@ -35,7 +35,7 @@ class AscendMemoryManager : public MemoryManager {
void *MallocMemFromMemPool(size_t size) override;
void FreeMemFromMemPool(void *device_ptr) override;
uint64_t GetDeviceMemSize();
void MallocSomasDynamicMem(const session::KernelGraph *graph) override;
void MallocSomasDynamicMem(const session::KernelGraph &graph) override;
uint8_t *MallocCommunicationMemFromMemPool(size_t size) override;
std::vector<void *> MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list) override {
return AscendMemoryPool::GetInstance().AllocContinuousTensorMem(total_size, size_list);

View File

@ -142,10 +142,9 @@ bool AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const No
}
auto input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
auto data_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
std::vector<int64_t> tmp_shape;
std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(tmp_shape), SizeToLong);
return UpdateShapeAndType(tmp_shape, data_type, NOT_NULL(input_shape_and_type_[input_index]));
return UpdateShapeAndType(tmp_shape, NOT_NULL(input_shape_and_type_[input_index]));
}
bool AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const NotNull<AnfNodePtr> &anf_node) {
@ -171,8 +170,7 @@ bool AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const
std::vector<int64_t> tmp_shape;
std::transform(shape.begin(), shape.end(), std::back_inserter(tmp_shape), SizeToLong);
return UpdateShapeAndType(tmp_shape, AnfAlgo::GetOutputDeviceDataType(anf_node, output_index),
NOT_NULL(output_shape_and_type_[output_index]));
return UpdateShapeAndType(tmp_shape, NOT_NULL(output_shape_and_type_[output_index]));
}
bool AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, NotNull<std::vector<int64_t> *> shape,
@ -182,7 +180,7 @@ bool AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, NotNull<s
return true;
}
bool AicpuExtInfoHandler::UpdateShapeAndType(const std::vector<int64_t> &shape, TypeId data_type,
bool AicpuExtInfoHandler::UpdateShapeAndType(const std::vector<int64_t> &shape,
NotNull<AicpuShapeAndType *> shape_and_type) {
if (shape.empty() || shape.size() > kernel::kMaxShapeDims) {
MS_LOG(ERROR) << "Invalid shape:" << shape.size();

View File

@ -65,8 +65,7 @@ class AicpuExtInfoHandler {
bool ParseExtInputShape(AicpuExtInfo *aicpu_ext_info);
bool ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info);
static bool UpdateShapeAndType(const std::vector<int64_t> &shape, TypeId data_type,
NotNull<AicpuShapeAndType *> shape_and_type);
static bool UpdateShapeAndType(const std::vector<int64_t> &shape, NotNull<AicpuShapeAndType *> shape_and_type);
static void GetShapeAndType(NotNull<const AicpuShapeAndType *> shape_and_type, NotNull<std::vector<int64_t> *> shape,
NotNull<TypeId *> data_type);

View File

@ -227,7 +227,7 @@ rtError_t CtrlCallbackHandle(uint32_t rt_type, void *data, uint32_t len) {
return RT_ERROR_NONE;
}
bool ProfilingManager::StopProfiling() {
bool ProfilingManager::StopProfiling() const {
MS_LOG(INFO) << "StopProfiling";
if (!IsProfiling()) {
MS_LOG(INFO) << "No need profiling. please export PROFILING_MODE and in train mode.";

View File

@ -49,7 +49,7 @@ class ProfilingManager {
bool ReportProfilingData(const map<uint32_t, string> &op_taskId_map) const;
bool ProfRegisterCtrlCallback() const;
bool StartupProfiling(uint32_t device_id);
bool StopProfiling();
bool StopProfiling() const;
inline bool IsProfiling() const {
auto profiler_manager = profiler::ProfilerManager::GetInstance();

View File

@ -210,7 +210,7 @@ void ProfilingUtils::GetTraceBpEnd(const session::KernelGraph &kernel_graph, con
if (bp_end_str.empty()) {
trace_info->trace_bp_end = trace_info->trace_iter_end;
} else {
trace_info->trace_bp_end.insert(bp_end_str);
(void)trace_info->trace_bp_end.insert(bp_end_str);
}
}

View File

@ -72,7 +72,8 @@ void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) {
if (is_enable_mem_reuse) {
MS_EXCEPTION_IF_NULL(mem_manager_);
mem_manager_->ResetDynamicMemory();
AssignDynamicMemory(kernel_graph);
MS_EXCEPTION_IF_NULL(kernel_graph);
AssignDynamicMemory(*kernel_graph);
#ifdef MEM_REUSE_DEBUG
// Get normal graph ir for memreuse
mindspore::memreuse::MemReuseChecker::GetInstance().CheckNormalIR(kernel_graph);
@ -405,16 +406,15 @@ void CPUKernelRuntime::DecreaseSummaryRefCount(const session::NamedSummaryOutput
static_cast<CPUMemoryManager *>(mem_manager_.get())->DecreaseSummaryRefCount(summary_outputs);
}
bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool) {
MS_EXCEPTION_IF_NULL(kernel_graph);
static_cast<CPUMemoryManager *>(mem_manager_.get())->IncreaseAddressRefCount(kernel_graph);
bool CPUKernelRuntime::Run(const session::KernelGraph &kernel_graph, bool) {
static_cast<CPUMemoryManager *>(mem_manager_.get())->IncreaseAddressRefCount(&kernel_graph);
auto kernels = kernel_graph->execution_order();
auto kernels = kernel_graph.execution_order();
#ifndef ENABLE_SECURITY
auto &dump_json_parser = DumpJsonParser::GetInstance();
bool iter_dump_flag = dump_json_parser.GetIterDumpFlag();
uint32_t graph_id = kernel_graph->graph_id();
uint32_t graph_id = kernel_graph.graph_id();
#endif
#ifdef ENABLE_DUMP_IR
std::string name = "mem_address_list";
@ -490,7 +490,7 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool) {
}
#ifndef ENABLE_SECURITY
if (iter_dump_flag) {
CPUE2eDump::DumpParametersAndConst(kernel_graph, graph_id);
CPUE2eDump::DumpParametersAndConst(&kernel_graph, graph_id);
}
if (graph_id == 0) {
dump_json_parser.UpdateDumpIter();

View File

@ -36,7 +36,7 @@ class CPUKernelRuntime : public KernelRuntime {
~CPUKernelRuntime() override = default;
bool Init();
bool Run(session::KernelGraph *graph, bool is_task_sink) override;
bool Run(const session::KernelGraph &graph, bool is_task_sink) override;
void AssignKernelAddress(session::KernelGraph *kernel_graph);
void CreateOutputTensors(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs,
VectorRef *outputs, std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node);
@ -44,8 +44,8 @@ class CPUKernelRuntime : public KernelRuntime {
VectorRef *outputs);
void IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs);
void DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs);
bool GenDynamicKernel(const session::KernelGraph *graph) override { return true; }
bool RunDynamicKernelAsync(const session::KernelGraph *graph) override { return true; }
bool GenDynamicKernel(const session::KernelGraph &graph) override { return true; }
bool RunDynamicKernelAsync(const session::KernelGraph &graph) override { return true; }
DeviceAddressType GetTargetDeviceAddressType() const override { return DeviceAddressType::kCPU; };
protected:

View File

@ -431,7 +431,7 @@ void GPUKernelRuntime::FetchMemUnitSize(const session::KernelGraph *graph) {
}
}
void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
void GPUKernelRuntime::AssignMemory(const session::KernelGraph &graph) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
MS_EXCEPTION_IF_NULL(mem_manager_);
@ -441,18 +441,17 @@ void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
bool is_enable_dynamic_mem = context_ptr->get_param<bool>(MS_CTX_ENABLE_DYNAMIC_MEM_POOL);
if (is_enable_dynamic_mem) {
// Use the dynamic memory pool.
InitKernelRefCount(graph);
InitMemorySwapInfo(graph);
InitKernelOutputAddress(graph);
InitKernelWorkspaceAddress(graph);
SaveGraphOutputNode(graph);
InitKernelRefCount(&graph);
InitMemorySwapInfo(&graph);
InitKernelOutputAddress(&graph);
InitKernelWorkspaceAddress(&graph);
SaveGraphOutputNode(&graph);
} else {
AssignDynamicMemory(graph);
}
}
bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink) {
MS_EXCEPTION_IF_NULL(graph);
bool GPUKernelRuntime::Run(const session::KernelGraph &graph, bool is_task_sink) {
struct timeval start_time, end_time;
(void)gettimeofday(&start_time, nullptr);
bool ret = true;
@ -462,7 +461,7 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink) {
bool is_enable_pynative_infer = context_ptr->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER);
bool is_pynative_mode = (context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode);
if (is_enable_dynamic_mem && !is_pynative_mode && !is_enable_pynative_infer) {
auto graph_id = graph->graph_id();
auto graph_id = graph.graph_id();
auto iter = mem_swap_map_.find(graph_id);
if (iter == mem_swap_map_.end()) {
MS_LOG(EXCEPTION) << "Find memory swap map failed.";
@ -476,11 +475,11 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink) {
mem_reuse_util_ = mem_reuse_iter->second;
MS_EXCEPTION_IF_NULL(mem_reuse_util_);
ret = RunOneStep(graph);
ret = RunOneStep(&graph);
} else {
if (graph->is_dynamic_shape()) {
if (graph.is_dynamic_shape()) {
// run dynamic shape graph in pynative
ret = RunOpLaunchKernelDynamic(graph);
ret = RunOpLaunchKernelDynamic(&graph);
} else {
ret = LaunchKernels(graph);
}

View File

@ -43,10 +43,10 @@ class GPUKernelRuntime : public KernelRuntime {
bool Init() override;
void ReleaseDeviceRes() override;
void ClearGraphRuntimeResource(uint32_t graph_id) override;
void AssignMemory(session::KernelGraph *graph) override;
bool Run(session::KernelGraph *graph, bool is_task_sink) override;
bool GenDynamicKernel(const session::KernelGraph *graph) override { return true; }
bool RunDynamicKernelAsync(const session::KernelGraph *graph) override { return true; }
void AssignMemory(const session::KernelGraph &graph) override;
bool Run(const session::KernelGraph &graph, bool is_task_sink) override;
bool GenDynamicKernel(const session::KernelGraph &graph) override { return true; }
bool RunDynamicKernelAsync(const session::KernelGraph &graph) override { return true; }
DeviceAddressType GetTargetDeviceAddressType() const override { return DeviceAddressType::kGPU; }
std::shared_ptr<DeviceEvent> CreateDeviceEvent() override;
void *compute_stream() const override { return stream_; }

View File

@ -46,12 +46,11 @@ constexpr float kMaxMemReuseFactor = 0.8;
constexpr float kMinMemReuseFactor = 0.5;
constexpr float kRetryFactor = 0.1;
namespace {
std::vector<AnfNodePtr> GetGraphInputs(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
auto graph_inputs = graph->inputs();
std::vector<AnfNodePtr> GetGraphInputs(const session::KernelGraph &graph) {
auto graph_inputs = graph.inputs();
std::vector<AnfNodePtr> result(graph_inputs.begin(), graph_inputs.end());
std::set<AnfNodePtr> inputs_set(graph_inputs.begin(), graph_inputs.end());
auto kernels = graph->execution_order();
auto kernels = graph.execution_order();
for (auto &kernel : kernels) {
MS_EXCEPTION_IF_NULL(kernel);
auto input_num = AnfAlgo::GetInputTensorNum(kernel);
@ -71,9 +70,9 @@ std::vector<AnfNodePtr> GetGraphInputs(const session::KernelGraph *graph) {
constexpr size_t kMinInputSize = 2;
KernelRuntime::~KernelRuntime() {}
bool KernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) { return true; }
bool KernelRuntime::Load(const session::KernelGraph &graph, bool is_task_sink) { return true; }
bool KernelRuntime::LoadData(session::KernelGraph *) { return false; }
bool KernelRuntime::LoadData(const session::KernelGraph &) { return false; }
bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) {
MS_EXCEPTION_IF_NULL(kernel);
@ -85,7 +84,7 @@ bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_
return false;
}
void KernelRuntime::AssignMemory(session::KernelGraph *graph) {
void KernelRuntime::AssignMemory(const session::KernelGraph &graph) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto enable_mem_scheduler = context_ptr->get_param<bool>(MS_CTX_ENABLE_MEM_SCHEDULER);
@ -262,9 +261,8 @@ void KernelRuntime::RunOpMallocPre(const session::KernelGraph &graph,
}
}
void KernelRuntime::ResetNodeAddress(session::KernelGraph *kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto kernels = kernel_graph->execution_order();
void KernelRuntime::ResetNodeAddress(const session::KernelGraph &kernel_graph) {
auto kernels = kernel_graph.execution_order();
for (auto &kernel : kernels) {
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
MS_EXCEPTION_IF_NULL(kernel_mod);
@ -303,39 +301,38 @@ void KernelRuntime::ResetNodeAddress(session::KernelGraph *kernel_graph) {
}
}
void KernelRuntime::RunOpAssignMemory(const std::vector<tensor::TensorPtr> &input_tensors, session::KernelGraph *graph,
void KernelRuntime::RunOpAssignMemory(const std::vector<tensor::TensorPtr> &input_tensors,
const session::KernelGraph &graph,
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node) {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(mem_manager_);
mem_manager_->ResetDynamicMemory();
for (const auto &node : graph->execution_order()) {
for (const auto &node : graph.execution_order()) {
RunOpAssignCommunicationOutput(node);
RunOpAssignCommunicationInput(node);
}
RunOpAssignInputMemory(input_tensors, graph);
AssignStaticMemoryValueNode(graph);
for (const auto &node : graph->execution_order()) {
for (const auto &node : graph.execution_order()) {
RunOpAssignOutputMemory(node, tensor_to_node);
RunOpAssignWorkSpaceMemory(node);
}
UpdateRefNodeOutputMem(graph);
}
void KernelRuntime::RunOpClearMemory(const session::KernelGraph *graph) const {
MS_EXCEPTION_IF_NULL(graph);
void KernelRuntime::RunOpClearMemory(const session::KernelGraph &graph) const {
// clear input parameter memory resource
for (const auto &input_node : graph->inputs()) {
for (const auto &input_node : graph.inputs()) {
MS_EXCEPTION_IF_NULL(input_node);
AnfAlgo::SetOutputAddr(nullptr, 0, input_node.get());
}
// clear input value node memory resource
for (const auto &value_node : graph->graph_value_nodes()) {
for (const auto &value_node : graph.graph_value_nodes()) {
MS_EXCEPTION_IF_NULL(value_node);
AnfAlgo::SetOutputAddr(nullptr, 0, value_node.get());
}
for (const auto &cnode : graph->execution_order()) {
for (const auto &cnode : graph.execution_order()) {
MS_EXCEPTION_IF_NULL(cnode);
// clear output memory resource
size_t output_num = AnfAlgo::GetOutputTensorNum(cnode);
@ -372,23 +369,22 @@ bool KernelRuntime::DumpDataEnabledIteration() {
}
#endif
void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) {
void KernelRuntime::AssignStaticMemory(const session::KernelGraph &graph) {
AssignStaticMemoryInput(graph);
AssignStaticMemoryValueNode(graph);
AssignStaticMemoryOutput(graph);
}
void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> &input_tensors,
const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
const session::KernelGraph &graph) {
MS_EXCEPTION_IF_NULL(mem_manager_);
if (input_tensors.size() != graph->inputs().size()) {
if (input_tensors.size() != graph.inputs().size()) {
MS_LOG(EXCEPTION) << "Input tensors size " << input_tensors.size()
<< " should be equal to graph input parameter size " << graph->inputs().size();
<< " should be equal to graph input parameter size " << graph.inputs().size();
}
for (size_t input_index = 0; input_index < graph->inputs().size(); ++input_index) {
auto item = graph->inputs()[input_index];
for (size_t input_index = 0; input_index < graph.inputs().size(); ++input_index) {
auto item = graph.inputs()[input_index];
MS_EXCEPTION_IF_NULL(item);
if (!item->isa<Parameter>()) {
continue;
@ -400,7 +396,9 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr>
auto output_address = std::dynamic_pointer_cast<device::DeviceAddress>(current_tensor->device_address());
if (output_address != nullptr && output_address->DeviceType() == GetTargetDeviceAddressType()) {
if (output_address->ptr_ == nullptr) {
mem_manager_->MallocMemFromMemPool(output_address, output_address->size());
if (!mem_manager_->MallocMemFromMemPool(output_address, output_address->size())) {
MS_LOG(EXCEPTION) << "Allocate memory failed, size:" << output_address->size();
}
}
AnfAlgo::SetOutputAddr(output_address, index, item.get());
@ -448,7 +446,9 @@ void KernelRuntime::RunOpAssignOutputMemory(
MS_EXCEPTION_IF_NULL(address);
if (address->ptr() == nullptr) {
MS_EXCEPTION_IF_NULL(mem_manager_);
mem_manager_->MallocMemFromMemPool(address, address->size());
if (!mem_manager_->MallocMemFromMemPool(address, address->size())) {
MS_LOG(EXCEPTION) << "Allocate memory failed, size:" << address->size();
}
}
continue;
}
@ -489,14 +489,13 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) {
}
}
void KernelRuntime::RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, session::KernelGraph *graph) {
void KernelRuntime::RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, const session::KernelGraph &graph) {
if (pre_output_value == nullptr) {
return;
}
std::vector<tensor::TensorPtr> pre_output_tensors;
TensorValueToTensor(pre_output_value, &pre_output_tensors);
MS_EXCEPTION_IF_NULL(graph);
auto output_nodes = graph->outputs();
auto output_nodes = graph.outputs();
if (pre_output_tensors.size() != output_nodes.size()) {
MS_LOG(EXCEPTION) << "The size of pre output tensors [" << pre_output_tensors.size()
<< "] is not equal to the size of output nodes of graph [" << output_nodes.size() << "]";
@ -536,13 +535,12 @@ void KernelRuntime::RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value
}
}
void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph &graph) {
MS_EXCEPTION_IF_NULL(mem_manager_);
MS_LOG(INFO) << "AssignStaticMemoryInput start for graph " << graph->graph_id();
MS_LOG(INFO) << "AssignStaticMemoryInput start for graph " << graph.graph_id();
auto graph_inputs = GetGraphInputs(graph);
auto graph_valid_input = graph->valid_inputs();
graph_inputs.insert(graph_inputs.end(), graph->child_graph_result().begin(), graph->child_graph_result().end());
auto graph_valid_input = graph.valid_inputs();
graph_inputs.insert(graph_inputs.end(), graph.child_graph_result().begin(), graph.child_graph_result().end());
std::vector<AnfNodePtr> need_alloc_nodes;
auto add_need_alloc_nodes = [&need_alloc_nodes, graph, this](const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
@ -553,7 +551,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
return;
}
auto input_param = node->cast<ParameterPtr>();
if (input_param != nullptr && !input_param->IsUsedByRealKernelInGraph(graph->graph_id())) {
if (input_param != nullptr && !input_param->IsUsedByRealKernelInGraph(graph.graph_id())) {
return;
}
need_alloc_nodes.push_back(node);
@ -611,7 +609,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id, {item, index});
MS_LOG(INFO) << "Assign Static Memory for Input node, size:" << tensor_size
<< " node:" << item->fullname_with_scope() << " index: " << index;
if (mem_manager_->MallocMem(kStaticMem, tensor_size, device_address, graph->graph_id()) == nullptr) {
if (mem_manager_->MallocMem(kStaticMem, tensor_size, device_address, graph.graph_id()) == nullptr) {
MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem << ", tensor size is: " << tensor_size;
}
AnfAlgo::SetOutputAddr(device_address, index, item.get());
@ -620,10 +618,9 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) {
MS_LOG(INFO) << "AssignStaticMemoryInput end";
}
void KernelRuntime::AssignStaticMemoryOutput(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
MS_LOG(INFO) << "AssignStaticMemoryOutput start for graph " << graph->graph_id();
auto nodes = AnfAlgo::GetAllOutput(graph->output(), {prim::kPrimTupleGetItem});
void KernelRuntime::AssignStaticMemoryOutput(const session::KernelGraph &graph) {
MS_LOG(INFO) << "AssignStaticMemoryOutput start for graph " << graph.graph_id();
auto nodes = AnfAlgo::GetAllOutput(graph.output(), {prim::kPrimTupleGetItem});
std::vector<session::KernelWithIndex> non_communication_op;
// Assign Communicate Op Memory firstly.
for (const auto &node : nodes) {
@ -647,9 +644,8 @@ void KernelRuntime::AssignStaticMemoryOutput(const session::KernelGraph *graph)
MS_LOG(INFO) << "AssignStaticMemoryOutput end";
}
void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
auto &kernels = graph->execution_order();
void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph &graph) {
auto &kernels = graph.execution_order();
for (auto &kernel : kernels) {
MS_EXCEPTION_IF_NULL(kernel);
auto output_num = AnfAlgo::GetOutputTensorNum(kernel);
@ -659,8 +655,8 @@ void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph *graph) {
}
for (size_t i = 0; i < output_num; ++i) {
session::AnfWithOutIndex out_pair(kernel, i);
if (graph->IsInRefOutputMap(out_pair)) {
auto origin_pair = graph->GetRefCorrespondOutput(out_pair);
if (graph.IsInRefOutputMap(out_pair)) {
auto origin_pair = graph.GetRefCorrespondOutput(out_pair);
MS_EXCEPTION_IF_NULL(origin_pair.first);
auto origin_node_output_addr = AnfAlgo::GetMutableOutputAddr(origin_pair.first, origin_pair.second);
MS_EXCEPTION_IF_NULL(origin_node_output_addr);
@ -682,10 +678,9 @@ void KernelRuntime::AssignCommunicationNodeMem(MemType type, const AnfNodePtr &n
AssignWorkSpaceMem(type, node);
}
void KernelRuntime::GenKernelEvents(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
auto &kernels = graph->execution_order();
if (kernels.empty() || graph_kernel_events_map_.find(graph->graph_id()) != graph_kernel_events_map_.end()) {
void KernelRuntime::GenKernelEvents(const session::KernelGraph &graph) {
auto &kernels = graph.execution_order();
if (kernels.empty() || graph_kernel_events_map_.find(graph.graph_id()) != graph_kernel_events_map_.end()) {
return;
}
auto kernel_events =
@ -736,7 +731,7 @@ void KernelRuntime::GenKernelEvents(const session::KernelGraph *graph) {
kernel_post_run_events[i].emplace_back([post_event]() { post_event->WaitEvent(); });
}
}
graph_kernel_events_map_[graph->graph_id()] = std::move(kernel_events);
graph_kernel_events_map_[graph.graph_id()] = std::move(kernel_events);
}
void KernelRuntime::AssignCommunicationNodeOutputMem(MemType type, const AnfNodePtr &node) {
@ -989,15 +984,14 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
}
}
void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
void KernelRuntime::AssignStaticMemoryValueNode(const session::KernelGraph &graph) {
MS_EXCEPTION_IF_NULL(mem_manager_);
MS_LOG(DEBUG) << "AssignStaticMemoryValueNode start for graph " << graph->graph_id();
MS_LOG(DEBUG) << "AssignStaticMemoryValueNode start for graph " << graph.graph_id();
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
// order the value nodes
std::map<std::string, ValueNodePtr> value_nodes_map;
for (auto &node : graph->graph_value_nodes()) {
for (auto &node : graph.graph_value_nodes()) {
MS_EXCEPTION_IF_NULL(node);
value_nodes_map[node->fullname_with_scope()] = node;
}
@ -1007,22 +1001,18 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(value_node);
if (NodeOutputDeviceAddressExist(value_node, 0)) {
MS_LOG(DEBUG) << "value_node[" << value_node->DebugString() << "] address already exist";
// TODO(jojo): PyNaitve Infer ?
auto device_address = AnfAlgo::GetMutableOutputAddr(value_node, 0);
if (device_address->ptr_ == nullptr) {
if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER)) {
if (!mem_manager_->MallocMemFromMemPool(device_address, device_address->size_)) {
MS_LOG(EXCEPTION) << "MallocMemFromMemPool failed";
}
} else {
if (mem_manager_->MallocMem(kStaticMem, device_address->size_, device_address, graph->graph_id())) {
if (mem_manager_->MallocMem(kStaticMem, device_address->size_, device_address, graph.graph_id())) {
MS_LOG(EXCEPTION) << "MallocMem kStaticMem failed";
}
}
}
continue;
}
auto &node_value = value_node->value();
@ -1042,7 +1032,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
} else {
MS_LOG(INFO) << "Assign Static Memory for Value node, size:" << tensor_size
<< " node:" << value_node->fullname_with_scope();
if (mem_manager_->MallocMem(kStaticMem, tensor_size, address, graph->graph_id()) == nullptr) {
if (mem_manager_->MallocMem(kStaticMem, tensor_size, address, graph.graph_id()) == nullptr) {
MS_LOG(EXCEPTION) << "Cannot alloc address when flag is: " << kStaticMem
<< ", tensor size is: " << tensor_size;
}
@ -1057,8 +1047,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) {
MS_LOG(DEBUG) << "AssignStaticMemoryValueNode end";
}
void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
void KernelRuntime::AssignDynamicMemory(const session::KernelGraph &graph) {
MS_EXCEPTION_IF_NULL(mem_manager_);
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
@ -1078,7 +1067,7 @@ void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) {
} else {
MS_LOG(INFO) << "Memory Reuse is disable...";
}
auto &execution_nodes = graph->execution_order();
auto &execution_nodes = graph.execution_order();
std::vector<CNodePtr> compute_nodes;
// communication nodes first
for (auto &node : execution_nodes) {
@ -1338,17 +1327,16 @@ void KernelRuntime::AssignKernelAddress(const std::shared_ptr<MemScheduler> &mem
}
void KernelRuntime::SyncNodeOutputTensors(const std::shared_ptr<MemScheduler> &mem_scheduler,
const session::KernelGraph *graph, const AnfNodePtr &kernel, bool mock) {
MS_EXCEPTION_IF_NULL(graph);
const session::KernelGraph &graph, const AnfNodePtr &kernel, bool mock) {
MS_EXCEPTION_IF_NULL(mem_scheduler);
MS_EXCEPTION_IF_NULL(kernel);
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
MS_EXCEPTION_IF_NULL(kernel_mod);
for (size_t j = 0; j < kernel_mod->GetOutputSizeList().size(); ++j) {
auto tensor = graph->GetNodeOutputTensor(std::make_pair(kernel, j));
auto tensor = graph.GetNodeOutputTensor(std::make_pair(kernel, j));
auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, j, true);
if (mock) {
if (graph->IsInternalOutput(kernel, j) && device_address != nullptr) {
if (graph.IsInternalOutput(kernel, j) && device_address != nullptr) {
mem_scheduler->SetMemPriority(device_address.get(), kMemPriorityHigh);
}
continue;
@ -1377,11 +1365,10 @@ void KernelRuntime::SyncNodeOutputTensors(const std::shared_ptr<MemScheduler> &m
}
void KernelRuntime::InitGraphInputTensors(const std::shared_ptr<MemScheduler> &mem_scheduler,
const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
const session::KernelGraph &graph) {
MS_EXCEPTION_IF_NULL(mem_scheduler);
auto &input_nodes = graph->input_nodes();
auto &input_tensors = graph->input_tensors();
auto &input_nodes = graph.input_nodes();
auto &input_tensors = graph.input_tensors();
if (input_tensors.size() != input_nodes.size()) {
MS_LOG_EXCEPTION << "Invalid input tensor size:" << input_tensors.size() << " vs node size:" << input_nodes.size();
}
@ -1407,9 +1394,8 @@ void KernelRuntime::InitGraphInputTensors(const std::shared_ptr<MemScheduler> &m
}
}
bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph, const AnfNodePtr &kernel,
bool KernelRuntime::LaunchKernel(const session::KernelGraph &graph, const AnfNodePtr &kernel,
const std::shared_ptr<MemScheduler> &mem_scheduler, bool mock) {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(kernel);
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
MS_EXCEPTION_IF_NULL(kernel_mod);
@ -1456,21 +1442,21 @@ bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph, const AnfNod
return ret;
}
bool KernelRuntime::LaunchKernelMod(const session::KernelGraph *graph, bool mock) {
bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph, bool mock) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
std::shared_ptr<MemScheduler> mem_scheduler = nullptr;
auto enable_mem_scheduler = context_ptr->get_param<bool>(MS_CTX_ENABLE_MEM_SCHEDULER);
if (enable_mem_scheduler) {
mem_scheduler = mem_scheduler_manager_.GetOrCreateMemScheduler(graph->graph_id());
mem_scheduler = mem_scheduler_manager_.GetOrCreateMemScheduler(graph.graph_id());
MS_EXCEPTION_IF_NULL(mem_scheduler);
mem_scheduler->SetMemHandler(mem_manager_);
mem_scheduler->RecordMemUsage();
InitGraphInputTensors(mem_scheduler, graph);
}
const auto &kernels = graph->execution_order();
const auto &kernels = graph.execution_order();
std::vector<DynamicKernelPtr> dynamic_kernel_list;
auto iter = graph_dynamic_kernel_map_.find(graph->graph_id());
auto iter = graph_dynamic_kernel_map_.find(graph.graph_id());
if (iter != graph_dynamic_kernel_map_.end()) {
dynamic_kernel_list = iter->second;
}
@ -1480,7 +1466,7 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph *graph, bool mock
}
std::vector<std::vector<std::function<void()>>> kernel_pre_run_events;
std::vector<std::vector<std::function<void()>>> kernel_post_run_events;
auto events_iter = graph_kernel_events_map_.find(graph->graph_id());
auto events_iter = graph_kernel_events_map_.find(graph.graph_id());
if (events_iter != graph_kernel_events_map_.end()) {
kernel_pre_run_events = events_iter->second.first;
kernel_post_run_events = events_iter->second.second;
@ -1528,13 +1514,12 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph *graph, bool mock
return true;
}
void KernelRuntime::UseMemSchedulerIfNeeded(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
void KernelRuntime::UseMemSchedulerIfNeeded(const session::KernelGraph &graph) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto enable_mem_scheduler = context_ptr->get_param<bool>(MS_CTX_ENABLE_MEM_SCHEDULER);
if (enable_mem_scheduler) {
auto mem_scheduler = mem_scheduler_manager_.GetOrCreateMemScheduler(graph->graph_id());
auto mem_scheduler = mem_scheduler_manager_.GetOrCreateMemScheduler(graph.graph_id());
if (mem_scheduler->need_record_event()) {
(void)LaunchKernelMod(graph, true);
}
@ -1551,8 +1536,7 @@ void KernelRuntime::UseMemSchedulerIfNeeded(const session::KernelGraph *graph) {
}
}
bool KernelRuntime::LaunchKernels(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
bool KernelRuntime::LaunchKernels(const session::KernelGraph &graph) {
UseMemSchedulerIfNeeded(graph);
if (!LaunchKernelMod(graph)) {
MS_LOG(ERROR) << "LaunchKernelMod failed!";
@ -1574,11 +1558,10 @@ void KernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
}
#if ((defined ENABLE_CPU) && (!defined _WIN32))
void KernelRuntime::GetFirstPSEmbeddingCache(const session::KernelGraph *graph,
void KernelRuntime::GetFirstPSEmbeddingCache(const session::KernelGraph &graph,
AnfNodePtr *const first_cache_input_index,
size_t *const first_cache_size) {
MS_EXCEPTION_IF_NULL(graph);
for (const auto &kernel : graph->execution_order()) {
for (const auto &kernel : graph.execution_order()) {
MS_EXCEPTION_IF_NULL(kernel);
auto kernel_name = AnfAlgo::GetCNodeName(kernel);
if (kernel_name != kGatherV2OpName && kernel_name != kSparseGatherV2OpName) {
@ -1647,13 +1630,12 @@ void KernelRuntime::CheckSparsePSEmbeddingCache(const CNodePtr &node) {
}
}
void KernelRuntime::CheckIfSupportPSEmbeddingCache(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
void KernelRuntime::CheckIfSupportPSEmbeddingCache(const session::KernelGraph &graph) {
AnfNodePtr first_cache_input_index = nullptr;
size_t first_cache_size = 0;
GetFirstPSEmbeddingCache(graph, &first_cache_input_index, &first_cache_size);
MS_EXCEPTION_IF_NULL(first_cache_input_index);
for (const auto &kernel : graph->execution_order()) {
for (const auto &kernel : graph.execution_order()) {
MS_EXCEPTION_IF_NULL(kernel);
auto kernel_name = AnfAlgo::GetCNodeName(kernel);
if (kernel_name != kGatherV2OpName && kernel_name != kSparseGatherV2OpName) {

View File

@ -53,25 +53,26 @@ class KernelRuntime {
KernelRuntime() = default;
virtual ~KernelRuntime();
virtual bool Init() = 0;
virtual void AssignMemory(session::KernelGraph *graph);
void RunOpAssignMemory(const std::vector<tensor::TensorPtr> &input_tensors, session::KernelGraph *graph,
virtual void AssignMemory(const session::KernelGraph &graph);
void RunOpAssignMemory(const std::vector<tensor::TensorPtr> &input_tensors, const session::KernelGraph &graph,
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node = {});
void RunOpAssignCommunicationOutput(const AnfNodePtr &node) const;
void RunOpAssignCommunicationInput(const AnfNodePtr &node) const;
void RunOpClearMemory(const session::KernelGraph *graph) const;
void RunOpClearMemory(const session::KernelGraph &graph) const;
void RunOpMallocPre(const session::KernelGraph &graph, const std::vector<tensor::TensorPtr> &input_tensors);
#ifdef ENABLE_DEBUGGER
static bool DumpDataEnabled();
static bool DumpDataEnabledIteration();
#endif
virtual bool LoadData(session::KernelGraph *graph);
virtual bool Load(session::KernelGraph *graph, bool is_task_sink);
virtual bool Run(session::KernelGraph *graph, bool is_task_sink) = 0;
virtual bool GenDynamicKernel(const session::KernelGraph *graph) = 0;
virtual bool RunDynamicKernelAsync(const session::KernelGraph *graph) = 0;
bool LaunchKernels(const session::KernelGraph *graph);
virtual void AssignStaticMemoryInput(const session::KernelGraph *graph);
virtual void AssignStaticMemoryValueNode(session::KernelGraph *graph);
virtual bool LoadData(const session::KernelGraph &graph);
virtual bool Load(const session::KernelGraph &graph, bool is_task_sink);
virtual bool Run(const session::KernelGraph &graph, bool is_task_sink) = 0;
virtual bool GenDynamicKernel(const session::KernelGraph &graph) = 0;
virtual bool RunDynamicKernelAsync(const session::KernelGraph &graph) = 0;
bool LaunchKernels(const session::KernelGraph &graph);
virtual void AssignStaticMemoryInput(const session::KernelGraph &graph);
virtual void AssignStaticMemoryValueNode(const session::KernelGraph &graph);
virtual void ClearGraphRuntimeResource(uint32_t graph_id);
virtual bool SyncStream() = 0;
virtual bool MemcpyAsync(void *dst, const void *src, uint64_t size, int32_t kind) = 0;
@ -107,13 +108,13 @@ class KernelRuntime {
virtual void PreInit() {}
#endif
virtual uint64_t GetAvailableMemMaxSize() const { return 0; }
virtual void GenKernelEvents(const session::KernelGraph *graph);
virtual void GenKernelEvents(const session::KernelGraph &graph);
virtual std::shared_ptr<DeviceEvent> CreateDeviceEvent() { return nullptr; }
virtual std::shared_ptr<DeviceEvent> CreateDeviceTimeEvent() { return nullptr; }
virtual DeviceAddressType GetTargetDeviceAddressType() const = 0;
virtual void *compute_stream() const { return nullptr; }
virtual void *communication_stream() const { return nullptr; }
void UpdateRefNodeOutputMem(const session::KernelGraph *graph);
void UpdateRefNodeOutputMem(const session::KernelGraph &graph);
virtual DeviceAddressPtr AssignExtraStaticMem(const TensorPtr &tensor, const AnfNodePtr &node, size_t index);
virtual void *GetModelStream(uint32_t graph_id) const { return nullptr; }
@ -125,8 +126,8 @@ class KernelRuntime {
virtual bool NodeOutputDeviceAddressExist(const AnfNodePtr &node, size_t index);
virtual bool KernelMemNotReuse(const AnfNodePtr &node);
void AssignStaticMemory(session::KernelGraph *graph);
void AssignDynamicMemory(session::KernelGraph *graph);
void AssignStaticMemory(const session::KernelGraph &graph);
void AssignDynamicMemory(const session::KernelGraph &graph);
void AssignNodeOutputMem(MemType type, const AnfNodePtr &node, int index);
void AssignWorkSpaceMem(MemType type, const AnfNodePtr &node);
@ -141,35 +142,35 @@ class KernelRuntime {
virtual void KernelLaunchProfiling(const std::string &kernel_name) {}
private:
void UseMemSchedulerIfNeeded(const session::KernelGraph *graph);
bool LaunchKernel(const session::KernelGraph *graph, const AnfNodePtr &kernel,
void UseMemSchedulerIfNeeded(const session::KernelGraph &graph);
bool LaunchKernel(const session::KernelGraph &graph, const AnfNodePtr &kernel,
const std::shared_ptr<MemScheduler> &mem_scheduler, bool mock = false);
void ResetNodeAddress(session::KernelGraph *graph);
void ResetNodeAddress(const session::KernelGraph &graph);
void AssignKernelAddress(const std::shared_ptr<MemScheduler> &mem_scheduler, const AnfNodePtr &kernel,
AddressPtrList *kernel_inputs, AddressPtrList *kernel_workspaces,
AddressPtrList *kernel_outputs);
static void GetOrMallocAddress(const std::shared_ptr<MemScheduler> &mem_scheduler,
const DeviceAddress *device_address, const kernel::AddressPtr &kernel_addr);
void InitGraphInputTensors(const std::shared_ptr<MemScheduler> &mem_scheduler, const session::KernelGraph *graph);
void SyncNodeOutputTensors(const std::shared_ptr<MemScheduler> &mem_scheduler, const session::KernelGraph *graph,
void InitGraphInputTensors(const std::shared_ptr<MemScheduler> &mem_scheduler, const session::KernelGraph &graph);
void SyncNodeOutputTensors(const std::shared_ptr<MemScheduler> &mem_scheduler, const session::KernelGraph &graph,
const AnfNodePtr &kernel, bool mock);
void AssignStaticMemoryOutput(const session::KernelGraph *graph);
bool LaunchKernelMod(const session::KernelGraph *graph, bool mock = false);
void AssignStaticMemoryOutput(const session::KernelGraph &graph);
bool LaunchKernelMod(const session::KernelGraph &graph, bool mock = false);
void LaunchKernelEvent(const std::vector<std::vector<std::function<void()>>> &run_events, size_t index) const;
void DebugStreamSync(const CNodePtr &kernel);
static void GenAddrCleanLaunchArgs(const CNodePtr &cnode, AddressPtrList *kernel_inputs,
const std::shared_ptr<MemScheduler> &mem_schedule = nullptr);
void RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> &input_tensors, const session::KernelGraph *graph);
void RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> &input_tensors, const session::KernelGraph &graph);
void RunOpAssignOutputMemory(const AnfNodePtr &kernel,
const std::map<tensor::TensorPtr, session::KernelWithIndex> &tensor_to_node = {});
void RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel);
void RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, session::KernelGraph *graph);
void RunOpAssignOutputNodeMemory(const ValuePtr &pre_output_value, const session::KernelGraph &graph);
void AssignValueNodeTensor(const ValueNodePtr &value_node, const ValuePtr &node_value, size_t output_idx);
DeviceAddressPtr PreAssignCNodeMemory(const AnfNodePtr &anf_node, size_t index) const;
#if ((defined ENABLE_CPU) && (!defined _WIN32))
void GetFirstPSEmbeddingCache(const session::KernelGraph *graph, AnfNodePtr *const first_cache_input_index,
void GetFirstPSEmbeddingCache(const session::KernelGraph &graph, AnfNodePtr *const first_cache_input_index,
size_t *const first_cache_size);
void CheckIfSupportPSEmbeddingCache(const session::KernelGraph *graph);
void CheckIfSupportPSEmbeddingCache(const session::KernelGraph &graph);
void CheckSparsePSEmbeddingCache(const CNodePtr &node);
#endif
void RunOpGetCommunicationInputInfo(const AnfNodePtr &node, size_t *total_size,

View File

@ -35,18 +35,17 @@ size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) {
return (input_size + kMemAlignSize - 1) / kMemAlignSize * kMemAlignSize + 2 * kMemAlignSize;
}
void MemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
void MemoryManager::MallocSomasDynamicMem(const session::KernelGraph &graph) {
SomasPtr somas_reuse_util_ptr = std::make_shared<somas::Somas>();
MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr);
somas_reuse_util_ptr_ = somas_reuse_util_ptr;
if (!(somas_reuse_util_ptr->Allocate(graph))) {
if (!(somas_reuse_util_ptr->Allocate(&graph))) {
MS_LOG(EXCEPTION) << "Somas Allocate Failed.";
}
size_t total_allocated_size = somas_reuse_util_ptr->GetTotalMemSize();
MS_LOG(INFO) << "Graph " << graph->graph_id() << ": TotalSomasReuseDynamicSize [" << total_allocated_size << "]";
MS_LOG(INFO) << "Graph " << graph.graph_id() << ": TotalSomasReuseDynamicSize [" << total_allocated_size << "]";
if (total_allocated_size > 0) {
auto base_ptr = MallocDynamicMem(total_allocated_size, false);
MS_LOG(INFO) << "Somas Reuse Memory Base Address [" << static_cast<void *>(base_ptr) << "], End Address ["
@ -59,18 +58,18 @@ void MemoryManager::MallocSomasDynamicMem(const session::KernelGraph *graph) {
#ifdef ENABLE_DUMP_IR
SubModuleId module = SubModuleId::SM_OPTIMIZER;
std::string name = "somas_allocate_info." + std::to_string(graph->graph_id());
std::string name = "somas_allocate_info." + std::to_string(graph.graph_id());
(void)mindspore::RDR::RecordString(module, name, somas_reuse_util_ptr_->SomasInfo());
name = "somas_mem_info." + std::to_string(graph->graph_id());
name = "somas_mem_info." + std::to_string(graph.graph_id());
(void)mindspore::RDR::RecordString(module, name, somas_reuse_util_ptr_->SomasMemory());
#endif
bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
if (save_graphs) {
std::string file_path = GetSaveGraphsPathName("somas_allocate_info_" + std::to_string(graph->graph_id()) + ".ir");
std::string file_path = GetSaveGraphsPathName("somas_allocate_info_" + std::to_string(graph.graph_id()) + ".ir");
somas_reuse_util_ptr_->DumpSomasInfoIR(file_path);
std::string mem_file_path = GetSaveGraphsPathName("somas_mem_info_" + std::to_string(graph->graph_id()) + ".ir");
std::string mem_file_path = GetSaveGraphsPathName("somas_mem_info_" + std::to_string(graph.graph_id()) + ".ir");
somas_reuse_util_ptr_->DumpSomasMemoryIR(mem_file_path);
}
}

View File

@ -44,7 +44,7 @@ class MemoryManager : public MemHandler {
}
virtual void ClearGlobalIdleMem() {}
virtual void MallocSomasDynamicMem(const session::KernelGraph *graph);
virtual void MallocSomasDynamicMem(const session::KernelGraph &graph);
uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, MemType type, size_t size,
const DeviceAddressPtr &address, bool comm_mem);
uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, MemType type, size_t size);

View File

@ -1231,7 +1231,8 @@ AbstractBasePtr InferImplDynamicStitch(const AnalysisEnginePtr &, const Primitiv
AbstractBasePtr InferImplTensorCopySlices(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
const AbstractBasePtrList &args_spec_list) {
auto &op_name = primitive->name();
CheckArgsSize(op_name, args_spec_list, 5);
constexpr auto kTensorCopySlicesInputNum = 5;
CheckArgsSize(op_name, args_spec_list, kTensorCopySlicesInputNum);
AbstractTensorPtr input = CheckArg<AbstractTensor>(op_name, args_spec_list, 0);
return std::make_shared<AbstractTensor>(input->element(), input->shape());
}

View File

@ -493,7 +493,7 @@ AbstractBasePtr InferImplReduceScatter(const AnalysisEnginePtr &, const Primitiv
if (tmp_shape.empty()) {
MS_LOG(EXCEPTION) << "shape size is 0";
}
tmp_shape[0] = IntMulWithOverflowCheck(tmp_shape[0], rank_size);
tmp_shape[0] = LongMulWithOverflowCheck(tmp_shape[0], rank_size);
return std::make_shared<AbstractTensor>(x->element(), std::make_shared<Shape>(tmp_shape));
}

View File

@ -20,6 +20,7 @@ from ..operations import _inner_ops as inner
from .. import functional as F
from ..composite.multitype_ops.zeros_like_impl import zeros_like
@bprop_getters.register(inner.TensorCopySlices)
def get_bprop_tensor_copy_slices(self):
"""Generate bprop for TensorCopySlices"""