From b63a044a651d6a77f1f63113345a22a7d0576978 Mon Sep 17 00:00:00 2001 From: yuchaojie Date: Thu, 6 Jan 2022 14:50:38 +0800 Subject: [PATCH] add ge format convert for ND_RNN_BIAS&FRACTAL_ZN_RNN and filter None input and non-task op in data dumper --- .../kernel_compiler/tbe/tbe_adapter.cc | 5 +-- .../add_placeholder_for_dynamic_gru.cc | 2 +- .../ccsrc/backend/optimizer/somas/somas.cc | 2 +- .../backend/session/anf_runtime_algorithm.cc | 32 +++++++++++++------ .../backend/session/anf_runtime_algorithm.h | 4 +++ .../runtime/device/ascend/dump/data_dumper.cc | 9 +++++- .../runtime/device/ascend/ge_types_convert.cc | 4 ++- .../device/ascend/tasksink/task_generator.cc | 18 +++-------- .../ccsrc/runtime/device/kernel_runtime.cc | 11 +------ .../hardware/ascend/ascend_device_context.cc | 3 +- .../ccsrc/utils/ms_device_shape_transfer.cc | 27 ++++++++++------ mindspore/ccsrc/utils/utils.h | 1 + 12 files changed, 68 insertions(+), 50 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc index 6624b99b48e..7d1243c3921 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc @@ -31,6 +31,7 @@ #include "backend/kernel_compiler/tbe/tbe_json/tbe_json_utils.h" #include "utils/json_operation_utils.h" #include "utils/ms_context.h" +#include "utils/utils.h" namespace mindspore { namespace kernel { @@ -190,8 +191,8 @@ bool TbeAdapter::IsPlaceHolderInput(const AnfNodePtr &node, const OpIOInfoPtr &i } auto cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); - if (AnfAlgo::HasNodeAttr("placeholder_index", cnode)) { - auto none_index = AnfAlgo::GetNodeAttr>(node, "placeholder_index"); + if (AnfAlgo::HasNodeAttr(kAttrPlaceHolderIndex, cnode)) { + auto none_index = AnfAlgo::GetNodeAttr>(node, kAttrPlaceHolderIndex); return find(none_index.begin(), none_index.end(), input_ptr->index()) != none_index.end(); } else { MS_LOG(EXCEPTION) << "Cnode: " << cnode_name << " doesn't has attribute placeholder_index."; diff --git a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.cc index 833e0276379..7052b56d1f4 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.cc +++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.cc @@ -48,7 +48,7 @@ const AnfNodePtr InsertPlaceholderForDynamicGRUV2::Process(const FuncGraphPtr &f } std::vector new_inputs = {AnfAlgo::GetCNodePrimitiveNode(cnode)}; - auto none_index = AnfAlgo::GetNodeAttr>(cnode, "placeholder_index"); + auto none_index = AnfAlgo::GetNodeAttr>(cnode, kAttrPlaceHolderIndex); size_t real_input_index = 0; for (size_t in_idx = 0; in_idx < input_num + none_index.size(); in_idx++) { auto item = find(none_index.begin(), none_index.end(), in_idx); diff --git a/mindspore/ccsrc/backend/optimizer/somas/somas.cc b/mindspore/ccsrc/backend/optimizer/somas/somas.cc index 3e51b28005f..dc5643c00ee 100644 --- a/mindspore/ccsrc/backend/optimizer/somas/somas.cc +++ b/mindspore/ccsrc/backend/optimizer/somas/somas.cc @@ -881,7 +881,7 @@ void Somas::NonTaskSplitProcess(const session::KernelGraph *graph) { auto kernel_cnodes = graph->execution_order(); for (const auto &kernel : kernel_cnodes) { auto op_name = AnfAlgo::GetCNodeName(kernel); - if ((op_name == kSplitOpName || op_name == kSplitVOpName) && AnfAlgo::HasNodeAttr(kAttrNonTask, kernel)) { + if (AnfAlgo::IsNonTaskOp(kernel)) { std::vector refnode_input_output; auto node = nodes_map_[kernel.get()].at(0); MS_EXCEPTION_IF_NULL(node); diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc index 57fe68f92bc..aa24e4f3b4f 100644 --- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc +++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc @@ -2406,6 +2406,27 @@ bool AnfRuntimeAlgorithm::IsNodeInputContainMonad(const AnfNodePtr &node) { return false; } +bool AnfRuntimeAlgorithm::IsNonTaskOp(const CNodePtr &node) { + auto op_name = GetCNodeName(node); + return (op_name == kSplitOpName || op_name == kSplitVOpName) && AnfAlgo::HasNodeAttr(kAttrNonTask, node); +} + +bool AnfRuntimeAlgorithm::IsNoneInput(const AnfNodePtr &node, size_t index) { + auto op_name = GetCNodeName(node); + constexpr auto none_placeholder_index = 3; + if (op_name == kDynamicRNNOpName && index == none_placeholder_index) { + return true; + } + if (op_name == kDynamicGRUV2OpName) { + auto none_index = AnfAlgo::GetNodeAttr>(node, kAttrPlaceHolderIndex); + auto item = std::find(none_index.begin(), none_index.end(), index); + if (item != none_index.end()) { + return true; + } + } + return false; +} + void AnfRuntimeAlgorithm::CacheAddrForGraph(const KernelGraphPtr &kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_graph); auto ms_context = MsContext::GetInstance(); @@ -2450,18 +2471,9 @@ void AnfRuntimeAlgorithm::CacheAddrForKernel(const AnfNodePtr &node, kernel::Ker auto skip_nop_node = (ms_context->get_param(MS_CTX_EXECUTION_MODE) != kPynativeMode); size_t input_num = GetInputTensorNum(node); for (size_t i = 0; i < input_num; ++i) { - auto op_name = GetCNodeName(cnode); - constexpr auto none_placeholder_index = 3; - if (op_name == kDynamicRNNOpName && i == none_placeholder_index) { + if (IsNoneInput(node, i)) { continue; } - if (op_name == kDynamicGRUV2OpName) { - auto none_index = GetNodeAttr>(cnode, "placeholder_index"); - auto item = std::find(none_index.begin(), none_index.end(), i); - if (item != none_index.end()) { - continue; - } - } auto real_input = GetRealInputIndex(node, i); auto device_address = GetPrevNodeOutputAddr(node, real_input, skip_nop_node); MS_EXCEPTION_IF_NULL(device_address); diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h index b96b5e35e08..bc051ed8f36 100644 --- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h +++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h @@ -336,6 +336,10 @@ class AnfRuntimeAlgorithm { static bool IsControlOpExecInBackend(const AnfNodePtr &node); static bool IsNodeInputContainMonad(const AnfNodePtr &node); + // Check if node is non-task op. + static bool IsNonTaskOp(const CNodePtr &node); + // Check if node has none input after IR fusion. + static bool IsNoneInput(const AnfNodePtr &node, size_t index); // Save inputs/outputs/workspace address in kernel_mod. static void CacheAddrForGraph(const KernelGraphPtr &kernel_graph); static void CacheAddrForKernel(const AnfNodePtr &node, kernel::KernelMod *kernel_mod); diff --git a/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc index e5715e08370..edc826f9be8 100644 --- a/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc +++ b/mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc @@ -260,7 +260,11 @@ void DataDumper::ConstructDumpTask(NotNull kernel, NotNullset_end_graph(false); auto iter = runtime_info_map_.find(kernel->UniqueName()); if (iter == runtime_info_map_.end()) { - MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map"; + if (AnfAlgo::IsNonTaskOp(kernel.get())) { + MS_LOG(INFO) << "[DataDump] kernel [" << kernel->UniqueName() << "] is a non-task node, skip dump."; + return; + } + MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map, kernel name: " << kernel->UniqueName(); } MS_EXCEPTION_IF_NULL(iter->second); auto task_id = std::get(*iter->second); @@ -461,6 +465,9 @@ void DataDumper::DumpKernelInput(const CNodePtr &kernel, void *args, NotNull +#include "backend/session/anf_runtime_algorithm.h" #include "backend/kernel_compiler/task_stream.h" +#include "utils/utils.h" #include "utils/ms_utils.h" #ifndef ENABLE_SECURITY #include "runtime/device/ascend/profiling/profiling_utils.h" @@ -157,8 +159,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i kernel_mod->set_fullname(anf_node_ptr->fullname_with_scope()); kernel_mod->set_is_monad(AnfAlgo::IsNodeInputContainMonad(anf_node_ptr) && HasAbstractMonad(anf_node_ptr)); auto op_name = AnfAlgo::GetCNodeName(anf_node_ptr); - constexpr size_t kNonePlaceholderIdx = 3; - if ((op_name == kSplitOpName || op_name == kSplitVOpName) && AnfAlgo::HasNodeAttr(kAttrNonTask, anf_node_ptr)) { + if (AnfAlgo::IsNonTaskOp(anf_node_ptr)) { MS_LOG(INFO) << "Skip task generation for NonTask op " << anf_node_ptr->fullname_with_scope(); auto debug_info = std::make_shared(); MS_EXCEPTION_IF_NULL(debug_info); @@ -171,16 +172,9 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i if (op_name != kAtomicAddrCleanOpName) { size_t input_num = AnfAlgo::GetInputTensorNum(anf_node_ptr); for (size_t i = 0; i < input_num; ++i) { - if (op_name == kDynamicRNNOpName && i == kNonePlaceholderIdx) { + if (AnfAlgo::IsNoneInput(anf_node_ptr, i)) { continue; } - if (op_name == kDynamicGRUV2OpName) { - auto none_index = AnfAlgo::GetNodeAttr>(anf_node_ptr, "placeholder_index"); - auto item = find(none_index.begin(), none_index.end(), i); - if (item != none_index.end()) { - continue; - } - } auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i); auto device_address = AnfAlgo::GetPrevNodeOutputAddr(anf_node_ptr, real_input_index); AddressPtr input = std::make_shared
(); @@ -191,9 +185,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i auto prenode_with_index = AnfAlgo::GetPrevNodeOutput(anf_node_ptr, i); MS_EXCEPTION_IF_NULL(prenode_with_index.first); if (AnfUtils::IsRealCNodeKernel(prenode_with_index.first)) { - if ((AnfAlgo::GetCNodeName(prenode_with_index.first) == kSplitOpName || - AnfAlgo::GetCNodeName(prenode_with_index.first) == kSplitVOpName) && - AnfAlgo::HasNodeAttr(kAttrNonTask, prenode_with_index.first->cast())) { + if (AnfAlgo::IsNonTaskOp(prenode_with_index.first->cast())) { // use memory offset to implement NonTask Type Split op // when op A -> split(NonTask) -> op B, op B's input addr is split's input0's addr + offset // offset is split's output index * split's output size diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.cc b/mindspore/ccsrc/runtime/device/kernel_runtime.cc index 8b73f2b913d..6f07c602fe3 100644 --- a/mindspore/ccsrc/runtime/device/kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc @@ -1123,18 +1123,9 @@ void KernelRuntime::GenLaunchArgs(const mindspore::kernel::KernelMod &kernel_mod auto skip_nop_node = (ms_context->get_param(MS_CTX_EXECUTION_MODE) != kPynativeMode); size_t input_num = AnfAlgo::GetInputTensorNum(kernel); for (size_t i = 0; i < input_num; ++i) { - auto op_name = AnfAlgo::GetCNodeName(cnode); - constexpr auto none_placeholder_index = 3; - if (op_name == kDynamicRNNOpName && i == none_placeholder_index) { + if (AnfAlgo::IsNoneInput(kernel, i)) { continue; } - if (op_name == kDynamicGRUV2OpName) { - auto none_index = AnfAlgo::GetNodeAttr>(cnode, "placeholder_index"); - auto item = std::find(none_index.begin(), none_index.end(), i); - if (item != none_index.end()) { - continue; - } - } auto real_input = AnfAlgo::GetRealInputIndex(kernel, i); auto device_address = AnfAlgo::GetPrevNodeOutputAddr(kernel, real_input, skip_nop_node); MS_EXCEPTION_IF_NULL(device_address); diff --git a/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc b/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc index 78d1171d4e4..7032ddffef2 100644 --- a/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc +++ b/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc @@ -23,6 +23,7 @@ #include "backend/optimizer/ascend/ascend_backend_optimization.h" #include "backend/optimizer/graph_kernel/graph_kernel_optimization.h" #include "utils/context/graph_kernel_flags.h" +#include "utils/utils.h" #include "runtime/device/ascend/kernel_select_ascend.h" #include "runtime/device/kernel_adjust.h" #include "runtime/device/ascend/ascend_stream_assign.h" @@ -646,7 +647,7 @@ void AscendDeviceContext::PreprocessBeforeRunSingleOpGraph(const KernelGraphPtr static const std::set place_holder_nodes = {kDynamicRNNOpName, kDynamicGRUV2OpName}; auto iter = place_holder_nodes.find(op_name); if (iter != place_holder_nodes.end()) { - auto none_index = AnfAlgo::GetNodeAttr>(node, "placeholder_index"); + auto none_index = AnfAlgo::GetNodeAttr>(node, kAttrPlaceHolderIndex); // Remove seq_length auto input_num = AnfAlgo::GetInputTensorNum(node); std::vector new_inputs = {AnfAlgo::GetCNodePrimitiveNode(node)}; diff --git a/mindspore/ccsrc/utils/ms_device_shape_transfer.cc b/mindspore/ccsrc/utils/ms_device_shape_transfer.cc index 2aa22802ecf..85d9c3b2ad8 100644 --- a/mindspore/ccsrc/utils/ms_device_shape_transfer.cc +++ b/mindspore/ccsrc/utils/ms_device_shape_transfer.cc @@ -768,19 +768,26 @@ ShapeVector DeviceShapeTransfer::NDRNNBiasDeviceShape(const ShapeVector &shape, ShapeVector DeviceShapeTransfer::GetAttrInputAndHiddenSize(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - ShapeVector input_hidden_size = {kAlign16, kAlign16}; - if (!node->isa()) { + std::vector input_hidden_size = {kAlign16, kAlign16}; + if (!node->isa() && !node->isa()) { return input_hidden_size; } - auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - if (!AnfAlgo::HasNodeAttr(kAttrHiddenSize, cnode) || !AnfAlgo::HasNodeAttr(kAttrInputSize, cnode)) { - MS_LOG(EXCEPTION) - << "Node with format FRACTAL_ZN_RNN or ND_RNN_BIAS should have hidden_size or input_size attr. Node info:" - << cnode->DebugString(); + + if (node->isa()) { + auto param = node->cast(); + input_hidden_size[0] = param->input_size(); + input_hidden_size[1] = param->hidden_size(); + } else { + CNodePtr cnode = node->cast(); + if (cnode == nullptr || !AnfAlgo::HasNodeAttr(kAttrHiddenSize, cnode) || + !AnfAlgo::HasNodeAttr(kAttrInputSize, cnode)) { + MS_LOG(EXCEPTION) + << "Node with format FRACTAL_ZN_RNN or ND_RNN_BIAS should have hidden_size or input_size attr. Node info:" + << node->DebugString(); + } + input_hidden_size[0] = AnfAlgo::GetNodeAttr(cnode, kAttrInputSize); + input_hidden_size[1] = AnfAlgo::GetNodeAttr(cnode, kAttrHiddenSize); } - input_hidden_size[0] = AnfAlgo::GetNodeAttr(node, kAttrInputSize); - input_hidden_size[1] = AnfAlgo::GetNodeAttr(node, kAttrHiddenSize); return input_hidden_size; } diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h index 2be4b569d45..b741e7613b8 100644 --- a/mindspore/ccsrc/utils/utils.h +++ b/mindspore/ccsrc/utils/utils.h @@ -512,6 +512,7 @@ constexpr auto kAttrFuncType = "func_type"; constexpr auto kAttrCustAicpu = "cust_aicpu"; constexpr auto kAttrIsInternalOutputNopNode = "is_internal_output_nop_node"; constexpr auto kAttrIsUBFusionOp = "is_ub_fusion_op"; +constexpr auto kAttrPlaceHolderIndex = "placeholder_index"; constexpr auto kAttrMicro = "micro"; // custom operator func type