forked from mindspore-Ecosystem/mindspore
!28638 fix DynamicRNN data dump
Merge pull request !28638 from yuchaojie/ir_fusion2
This commit is contained in:
commit
699033ee96
|
@ -31,6 +31,7 @@
|
|||
#include "backend/kernel_compiler/tbe/tbe_json/tbe_json_utils.h"
|
||||
#include "utils/json_operation_utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "utils/utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -190,8 +191,8 @@ bool TbeAdapter::IsPlaceHolderInput(const AnfNodePtr &node, const OpIOInfoPtr &i
|
|||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (AnfAlgo::HasNodeAttr("placeholder_index", cnode)) {
|
||||
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "placeholder_index");
|
||||
if (AnfAlgo::HasNodeAttr(kAttrPlaceHolderIndex, cnode)) {
|
||||
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, kAttrPlaceHolderIndex);
|
||||
return find(none_index.begin(), none_index.end(), input_ptr->index()) != none_index.end();
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Cnode: " << cnode_name << " doesn't has attribute placeholder_index.";
|
||||
|
|
|
@ -48,7 +48,7 @@ const AnfNodePtr InsertPlaceholderForDynamicGRUV2::Process(const FuncGraphPtr &f
|
|||
}
|
||||
|
||||
std::vector<AnfNodePtr> new_inputs = {AnfAlgo::GetCNodePrimitiveNode(cnode)};
|
||||
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(cnode, "placeholder_index");
|
||||
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(cnode, kAttrPlaceHolderIndex);
|
||||
size_t real_input_index = 0;
|
||||
for (size_t in_idx = 0; in_idx < input_num + none_index.size(); in_idx++) {
|
||||
auto item = find(none_index.begin(), none_index.end(), in_idx);
|
||||
|
|
|
@ -896,7 +896,7 @@ void Somas::NonTaskSplitProcess(const session::KernelGraph *graph) {
|
|||
auto kernel_cnodes = graph->execution_order();
|
||||
for (const auto &kernel : kernel_cnodes) {
|
||||
auto op_name = AnfAlgo::GetCNodeName(kernel);
|
||||
if ((op_name == kSplitOpName || op_name == kSplitVOpName) && AnfAlgo::HasNodeAttr(kAttrNonTask, kernel)) {
|
||||
if (AnfAlgo::IsNonTaskOp(kernel)) {
|
||||
std::vector<size_t> refnode_input_output;
|
||||
auto node = nodes_map_[kernel.get()].at(0);
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
|
|
|
@ -2406,6 +2406,27 @@ bool AnfRuntimeAlgorithm::IsNodeInputContainMonad(const AnfNodePtr &node) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool AnfRuntimeAlgorithm::IsNonTaskOp(const CNodePtr &node) {
|
||||
auto op_name = GetCNodeName(node);
|
||||
return (op_name == kSplitOpName || op_name == kSplitVOpName) && AnfAlgo::HasNodeAttr(kAttrNonTask, node);
|
||||
}
|
||||
|
||||
bool AnfRuntimeAlgorithm::IsNoneInput(const AnfNodePtr &node, size_t index) {
|
||||
auto op_name = GetCNodeName(node);
|
||||
constexpr auto none_placeholder_index = 3;
|
||||
if (op_name == kDynamicRNNOpName && index == none_placeholder_index) {
|
||||
return true;
|
||||
}
|
||||
if (op_name == kDynamicGRUV2OpName) {
|
||||
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, kAttrPlaceHolderIndex);
|
||||
auto item = std::find(none_index.begin(), none_index.end(), index);
|
||||
if (item != none_index.end()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void AnfRuntimeAlgorithm::CacheAddrForGraph(const KernelGraphPtr &kernel_graph) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
|
@ -2450,18 +2471,9 @@ void AnfRuntimeAlgorithm::CacheAddrForKernel(const AnfNodePtr &node, kernel::Ker
|
|||
auto skip_nop_node = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode);
|
||||
size_t input_num = GetInputTensorNum(node);
|
||||
for (size_t i = 0; i < input_num; ++i) {
|
||||
auto op_name = GetCNodeName(cnode);
|
||||
constexpr auto none_placeholder_index = 3;
|
||||
if (op_name == kDynamicRNNOpName && i == none_placeholder_index) {
|
||||
if (IsNoneInput(node, i)) {
|
||||
continue;
|
||||
}
|
||||
if (op_name == kDynamicGRUV2OpName) {
|
||||
auto none_index = GetNodeAttr<std::vector<int64_t>>(cnode, "placeholder_index");
|
||||
auto item = std::find(none_index.begin(), none_index.end(), i);
|
||||
if (item != none_index.end()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
auto real_input = GetRealInputIndex(node, i);
|
||||
auto device_address = GetPrevNodeOutputAddr(node, real_input, skip_nop_node);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
|
|
|
@ -336,6 +336,10 @@ class AnfRuntimeAlgorithm {
|
|||
static bool IsControlOpExecInBackend(const AnfNodePtr &node);
|
||||
|
||||
static bool IsNodeInputContainMonad(const AnfNodePtr &node);
|
||||
// Check if node is non-task op.
|
||||
static bool IsNonTaskOp(const CNodePtr &node);
|
||||
// Check if node has none input after IR fusion.
|
||||
static bool IsNoneInput(const AnfNodePtr &node, size_t index);
|
||||
// Save inputs/outputs/workspace address in kernel_mod.
|
||||
static void CacheAddrForGraph(const KernelGraphPtr &kernel_graph);
|
||||
static void CacheAddrForKernel(const AnfNodePtr &node, kernel::KernelMod *kernel_mod);
|
||||
|
|
|
@ -260,7 +260,11 @@ void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aic
|
|||
dump_task->set_end_graph(false);
|
||||
auto iter = runtime_info_map_.find(kernel->UniqueName());
|
||||
if (iter == runtime_info_map_.end()) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
|
||||
if (AnfAlgo::IsNonTaskOp(kernel.get())) {
|
||||
MS_LOG(INFO) << "[DataDump] kernel [" << kernel->UniqueName() << "] is a non-task node, skip dump.";
|
||||
return;
|
||||
}
|
||||
MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map, kernel name: " << kernel->UniqueName();
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(iter->second);
|
||||
auto task_id = std::get<kTupleTaskId>(*iter->second);
|
||||
|
@ -461,6 +465,9 @@ void DataDumper::DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aic
|
|||
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
|
||||
uint64_t offset = 0;
|
||||
for (size_t i = 0; i < input_size; ++i) {
|
||||
if (AnfAlgo::IsNoneInput(kernel, i)) {
|
||||
continue;
|
||||
}
|
||||
aicpu::dump::Input input;
|
||||
auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
|
||||
auto input_node = input_node_with_index.first;
|
||||
|
|
|
@ -80,7 +80,9 @@ ge::Format GeTypesConvert::GetGeFormat(const std::string &format, size_t shape_s
|
|||
{kOpFormat_DHWCN, ge::Format::FORMAT_DHWCN},
|
||||
{kOpFormat_NDC1HWC0, ge::Format::FORMAT_NDC1HWC0},
|
||||
{kOpFormat_FRACTAL_Z_3D, ge::Format::FORMAT_FRACTAL_Z_3D},
|
||||
{kOpFormat_FRACTAL_ZN_LSTM, ge::Format::FORMAT_FRACTAL_ZN_LSTM}};
|
||||
{kOpFormat_FRACTAL_ZN_LSTM, ge::Format::FORMAT_FRACTAL_ZN_LSTM},
|
||||
{kOpFormat_ND_RNN_BIAS, ge::Format::FORMAT_ND_RNN_BIAS},
|
||||
{kOpFormat_FRACTAL_ZN_RNN, ge::Format::FORMAT_FRACTAL_ZN_RNN}};
|
||||
MS_LOG(INFO) << "GetGeFormat format:" << format << " shape_size:" << shape_size;
|
||||
if (format == kOpFormat_DEFAULT) {
|
||||
return shape_size == k4dSize ? ge::Format::FORMAT_NCHW : ge::Format::FORMAT_ND;
|
||||
|
|
|
@ -17,7 +17,9 @@
|
|||
#include "runtime/device/ascend/tasksink/task_generator.h"
|
||||
|
||||
#include <runtime/rt.h>
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/kernel_compiler/task_stream.h"
|
||||
#include "utils/utils.h"
|
||||
#include "utils/ms_utils.h"
|
||||
#ifndef ENABLE_SECURITY
|
||||
#include "runtime/device/ascend/profiling/profiling_utils.h"
|
||||
|
@ -157,8 +159,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
|
|||
kernel_mod->set_fullname(anf_node_ptr->fullname_with_scope());
|
||||
kernel_mod->set_is_monad(AnfAlgo::IsNodeInputContainMonad(anf_node_ptr) && HasAbstractMonad(anf_node_ptr));
|
||||
auto op_name = AnfAlgo::GetCNodeName(anf_node_ptr);
|
||||
constexpr size_t kNonePlaceholderIdx = 3;
|
||||
if ((op_name == kSplitOpName || op_name == kSplitVOpName) && AnfAlgo::HasNodeAttr(kAttrNonTask, anf_node_ptr)) {
|
||||
if (AnfAlgo::IsNonTaskOp(anf_node_ptr)) {
|
||||
MS_LOG(INFO) << "Skip task generation for NonTask op " << anf_node_ptr->fullname_with_scope();
|
||||
auto debug_info = std::make_shared<TaskDebugInfo>();
|
||||
MS_EXCEPTION_IF_NULL(debug_info);
|
||||
|
@ -171,16 +172,9 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
|
|||
if (op_name != kAtomicAddrCleanOpName) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node_ptr);
|
||||
for (size_t i = 0; i < input_num; ++i) {
|
||||
if (op_name == kDynamicRNNOpName && i == kNonePlaceholderIdx) {
|
||||
if (AnfAlgo::IsNoneInput(anf_node_ptr, i)) {
|
||||
continue;
|
||||
}
|
||||
if (op_name == kDynamicGRUV2OpName) {
|
||||
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(anf_node_ptr, "placeholder_index");
|
||||
auto item = find(none_index.begin(), none_index.end(), i);
|
||||
if (item != none_index.end()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i);
|
||||
auto device_address = AnfAlgo::GetPrevNodeOutputAddr(anf_node_ptr, real_input_index);
|
||||
AddressPtr input = std::make_shared<Address>();
|
||||
|
@ -191,9 +185,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
|
|||
auto prenode_with_index = AnfAlgo::GetPrevNodeOutput(anf_node_ptr, i);
|
||||
MS_EXCEPTION_IF_NULL(prenode_with_index.first);
|
||||
if (AnfUtils::IsRealCNodeKernel(prenode_with_index.first)) {
|
||||
if ((AnfAlgo::GetCNodeName(prenode_with_index.first) == kSplitOpName ||
|
||||
AnfAlgo::GetCNodeName(prenode_with_index.first) == kSplitVOpName) &&
|
||||
AnfAlgo::HasNodeAttr(kAttrNonTask, prenode_with_index.first->cast<CNodePtr>())) {
|
||||
if (AnfAlgo::IsNonTaskOp(prenode_with_index.first->cast<CNodePtr>())) {
|
||||
// use memory offset to implement NonTask Type Split op
|
||||
// when op A -> split(NonTask) -> op B, op B's input addr is split's input0's addr + offset
|
||||
// offset is split's output index * split's output size
|
||||
|
|
|
@ -1123,18 +1123,9 @@ void KernelRuntime::GenLaunchArgs(const mindspore::kernel::KernelMod &kernel_mod
|
|||
auto skip_nop_node = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel);
|
||||
for (size_t i = 0; i < input_num; ++i) {
|
||||
auto op_name = AnfAlgo::GetCNodeName(cnode);
|
||||
constexpr auto none_placeholder_index = 3;
|
||||
if (op_name == kDynamicRNNOpName && i == none_placeholder_index) {
|
||||
if (AnfAlgo::IsNoneInput(kernel, i)) {
|
||||
continue;
|
||||
}
|
||||
if (op_name == kDynamicGRUV2OpName) {
|
||||
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(cnode, "placeholder_index");
|
||||
auto item = std::find(none_index.begin(), none_index.end(), i);
|
||||
if (item != none_index.end()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
auto real_input = AnfAlgo::GetRealInputIndex(kernel, i);
|
||||
auto device_address = AnfAlgo::GetPrevNodeOutputAddr(kernel, real_input, skip_nop_node);
|
||||
MS_EXCEPTION_IF_NULL(device_address);
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "backend/optimizer/ascend/ascend_backend_optimization.h"
|
||||
#include "backend/optimizer/graph_kernel/graph_kernel_optimization.h"
|
||||
#include "utils/context/graph_kernel_flags.h"
|
||||
#include "utils/utils.h"
|
||||
#include "runtime/device/ascend/kernel_select_ascend.h"
|
||||
#include "runtime/device/kernel_adjust.h"
|
||||
#include "runtime/device/ascend/ascend_stream_assign.h"
|
||||
|
@ -649,7 +650,7 @@ void AscendDeviceContext::PreprocessBeforeRunSingleOpGraph(const KernelGraphPtr
|
|||
static const std::set<std::string> place_holder_nodes = {kDynamicRNNOpName, kDynamicGRUV2OpName};
|
||||
auto iter = place_holder_nodes.find(op_name);
|
||||
if (iter != place_holder_nodes.end()) {
|
||||
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "placeholder_index");
|
||||
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, kAttrPlaceHolderIndex);
|
||||
// Remove seq_length
|
||||
auto input_num = AnfAlgo::GetInputTensorNum(node);
|
||||
std::vector<AnfNodePtr> new_inputs = {AnfAlgo::GetCNodePrimitiveNode(node)};
|
||||
|
|
|
@ -768,19 +768,26 @@ ShapeVector DeviceShapeTransfer::NDRNNBiasDeviceShape(const ShapeVector &shape,
|
|||
|
||||
ShapeVector DeviceShapeTransfer::GetAttrInputAndHiddenSize(const AnfNodePtr &node) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
ShapeVector input_hidden_size = {kAlign16, kAlign16};
|
||||
if (!node->isa<CNode>()) {
|
||||
std::vector<int64_t> input_hidden_size = {kAlign16, kAlign16};
|
||||
if (!node->isa<CNode>() && !node->isa<Parameter>()) {
|
||||
return input_hidden_size;
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (!AnfAlgo::HasNodeAttr(kAttrHiddenSize, cnode) || !AnfAlgo::HasNodeAttr(kAttrInputSize, cnode)) {
|
||||
MS_LOG(EXCEPTION)
|
||||
<< "Node with format FRACTAL_ZN_RNN or ND_RNN_BIAS should have hidden_size or input_size attr. Node info:"
|
||||
<< cnode->DebugString();
|
||||
|
||||
if (node->isa<Parameter>()) {
|
||||
auto param = node->cast<ParameterPtr>();
|
||||
input_hidden_size[0] = param->input_size();
|
||||
input_hidden_size[1] = param->hidden_size();
|
||||
} else {
|
||||
CNodePtr cnode = node->cast<CNodePtr>();
|
||||
if (cnode == nullptr || !AnfAlgo::HasNodeAttr(kAttrHiddenSize, cnode) ||
|
||||
!AnfAlgo::HasNodeAttr(kAttrInputSize, cnode)) {
|
||||
MS_LOG(EXCEPTION)
|
||||
<< "Node with format FRACTAL_ZN_RNN or ND_RNN_BIAS should have hidden_size or input_size attr. Node info:"
|
||||
<< node->DebugString();
|
||||
}
|
||||
input_hidden_size[0] = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrInputSize);
|
||||
input_hidden_size[1] = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrHiddenSize);
|
||||
}
|
||||
input_hidden_size[0] = AnfAlgo::GetNodeAttr<int64_t>(node, kAttrInputSize);
|
||||
input_hidden_size[1] = AnfAlgo::GetNodeAttr<int64_t>(node, kAttrHiddenSize);
|
||||
return input_hidden_size;
|
||||
}
|
||||
|
||||
|
|
|
@ -512,6 +512,7 @@ constexpr auto kAttrFuncType = "func_type";
|
|||
constexpr auto kAttrCustAicpu = "cust_aicpu";
|
||||
constexpr auto kAttrIsInternalOutputNopNode = "is_internal_output_nop_node";
|
||||
constexpr auto kAttrIsUBFusionOp = "is_ub_fusion_op";
|
||||
constexpr auto kAttrPlaceHolderIndex = "placeholder_index";
|
||||
constexpr auto kAttrMicro = "micro";
|
||||
|
||||
// custom operator func type
|
||||
|
|
Loading…
Reference in New Issue