!28638 fix DynamicRNN data dump

Merge pull request !28638 from yuchaojie/ir_fusion2
This commit is contained in:
i-robot 2022-01-08 09:12:04 +00:00 committed by Gitee
commit 699033ee96
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
12 changed files with 68 additions and 50 deletions

View File

@ -31,6 +31,7 @@
#include "backend/kernel_compiler/tbe/tbe_json/tbe_json_utils.h"
#include "utils/json_operation_utils.h"
#include "utils/ms_context.h"
#include "utils/utils.h"
namespace mindspore {
namespace kernel {
@ -190,8 +191,8 @@ bool TbeAdapter::IsPlaceHolderInput(const AnfNodePtr &node, const OpIOInfoPtr &i
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (AnfAlgo::HasNodeAttr("placeholder_index", cnode)) {
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "placeholder_index");
if (AnfAlgo::HasNodeAttr(kAttrPlaceHolderIndex, cnode)) {
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, kAttrPlaceHolderIndex);
return find(none_index.begin(), none_index.end(), input_ptr->index()) != none_index.end();
} else {
MS_LOG(EXCEPTION) << "Cnode: " << cnode_name << " doesn't has attribute placeholder_index.";

View File

@ -48,7 +48,7 @@ const AnfNodePtr InsertPlaceholderForDynamicGRUV2::Process(const FuncGraphPtr &f
}
std::vector<AnfNodePtr> new_inputs = {AnfAlgo::GetCNodePrimitiveNode(cnode)};
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(cnode, "placeholder_index");
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(cnode, kAttrPlaceHolderIndex);
size_t real_input_index = 0;
for (size_t in_idx = 0; in_idx < input_num + none_index.size(); in_idx++) {
auto item = find(none_index.begin(), none_index.end(), in_idx);

View File

@ -896,7 +896,7 @@ void Somas::NonTaskSplitProcess(const session::KernelGraph *graph) {
auto kernel_cnodes = graph->execution_order();
for (const auto &kernel : kernel_cnodes) {
auto op_name = AnfAlgo::GetCNodeName(kernel);
if ((op_name == kSplitOpName || op_name == kSplitVOpName) && AnfAlgo::HasNodeAttr(kAttrNonTask, kernel)) {
if (AnfAlgo::IsNonTaskOp(kernel)) {
std::vector<size_t> refnode_input_output;
auto node = nodes_map_[kernel.get()].at(0);
MS_EXCEPTION_IF_NULL(node);

View File

@ -2406,6 +2406,27 @@ bool AnfRuntimeAlgorithm::IsNodeInputContainMonad(const AnfNodePtr &node) {
return false;
}
bool AnfRuntimeAlgorithm::IsNonTaskOp(const CNodePtr &node) {
auto op_name = GetCNodeName(node);
return (op_name == kSplitOpName || op_name == kSplitVOpName) && AnfAlgo::HasNodeAttr(kAttrNonTask, node);
}
bool AnfRuntimeAlgorithm::IsNoneInput(const AnfNodePtr &node, size_t index) {
auto op_name = GetCNodeName(node);
constexpr auto none_placeholder_index = 3;
if (op_name == kDynamicRNNOpName && index == none_placeholder_index) {
return true;
}
if (op_name == kDynamicGRUV2OpName) {
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, kAttrPlaceHolderIndex);
auto item = std::find(none_index.begin(), none_index.end(), index);
if (item != none_index.end()) {
return true;
}
}
return false;
}
void AnfRuntimeAlgorithm::CacheAddrForGraph(const KernelGraphPtr &kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto ms_context = MsContext::GetInstance();
@ -2450,18 +2471,9 @@ void AnfRuntimeAlgorithm::CacheAddrForKernel(const AnfNodePtr &node, kernel::Ker
auto skip_nop_node = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode);
size_t input_num = GetInputTensorNum(node);
for (size_t i = 0; i < input_num; ++i) {
auto op_name = GetCNodeName(cnode);
constexpr auto none_placeholder_index = 3;
if (op_name == kDynamicRNNOpName && i == none_placeholder_index) {
if (IsNoneInput(node, i)) {
continue;
}
if (op_name == kDynamicGRUV2OpName) {
auto none_index = GetNodeAttr<std::vector<int64_t>>(cnode, "placeholder_index");
auto item = std::find(none_index.begin(), none_index.end(), i);
if (item != none_index.end()) {
continue;
}
}
auto real_input = GetRealInputIndex(node, i);
auto device_address = GetPrevNodeOutputAddr(node, real_input, skip_nop_node);
MS_EXCEPTION_IF_NULL(device_address);

View File

@ -336,6 +336,10 @@ class AnfRuntimeAlgorithm {
static bool IsControlOpExecInBackend(const AnfNodePtr &node);
static bool IsNodeInputContainMonad(const AnfNodePtr &node);
// Check if node is non-task op.
static bool IsNonTaskOp(const CNodePtr &node);
// Check if node has none input after IR fusion.
static bool IsNoneInput(const AnfNodePtr &node, size_t index);
// Save inputs/outputs/workspace address in kernel_mod.
static void CacheAddrForGraph(const KernelGraphPtr &kernel_graph);
static void CacheAddrForKernel(const AnfNodePtr &node, kernel::KernelMod *kernel_mod);

View File

@ -260,7 +260,11 @@ void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aic
dump_task->set_end_graph(false);
auto iter = runtime_info_map_.find(kernel->UniqueName());
if (iter == runtime_info_map_.end()) {
MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
if (AnfAlgo::IsNonTaskOp(kernel.get())) {
MS_LOG(INFO) << "[DataDump] kernel [" << kernel->UniqueName() << "] is a non-task node, skip dump.";
return;
}
MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map, kernel name: " << kernel->UniqueName();
}
MS_EXCEPTION_IF_NULL(iter->second);
auto task_id = std::get<kTupleTaskId>(*iter->second);
@ -461,6 +465,9 @@ void DataDumper::DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aic
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
uint64_t offset = 0;
for (size_t i = 0; i < input_size; ++i) {
if (AnfAlgo::IsNoneInput(kernel, i)) {
continue;
}
aicpu::dump::Input input;
auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
auto input_node = input_node_with_index.first;

View File

@ -80,7 +80,9 @@ ge::Format GeTypesConvert::GetGeFormat(const std::string &format, size_t shape_s
{kOpFormat_DHWCN, ge::Format::FORMAT_DHWCN},
{kOpFormat_NDC1HWC0, ge::Format::FORMAT_NDC1HWC0},
{kOpFormat_FRACTAL_Z_3D, ge::Format::FORMAT_FRACTAL_Z_3D},
{kOpFormat_FRACTAL_ZN_LSTM, ge::Format::FORMAT_FRACTAL_ZN_LSTM}};
{kOpFormat_FRACTAL_ZN_LSTM, ge::Format::FORMAT_FRACTAL_ZN_LSTM},
{kOpFormat_ND_RNN_BIAS, ge::Format::FORMAT_ND_RNN_BIAS},
{kOpFormat_FRACTAL_ZN_RNN, ge::Format::FORMAT_FRACTAL_ZN_RNN}};
MS_LOG(INFO) << "GetGeFormat format:" << format << " shape_size:" << shape_size;
if (format == kOpFormat_DEFAULT) {
return shape_size == k4dSize ? ge::Format::FORMAT_NCHW : ge::Format::FORMAT_ND;

View File

@ -17,7 +17,9 @@
#include "runtime/device/ascend/tasksink/task_generator.h"
#include <runtime/rt.h>
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/task_stream.h"
#include "utils/utils.h"
#include "utils/ms_utils.h"
#ifndef ENABLE_SECURITY
#include "runtime/device/ascend/profiling/profiling_utils.h"
@ -157,8 +159,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
kernel_mod->set_fullname(anf_node_ptr->fullname_with_scope());
kernel_mod->set_is_monad(AnfAlgo::IsNodeInputContainMonad(anf_node_ptr) && HasAbstractMonad(anf_node_ptr));
auto op_name = AnfAlgo::GetCNodeName(anf_node_ptr);
constexpr size_t kNonePlaceholderIdx = 3;
if ((op_name == kSplitOpName || op_name == kSplitVOpName) && AnfAlgo::HasNodeAttr(kAttrNonTask, anf_node_ptr)) {
if (AnfAlgo::IsNonTaskOp(anf_node_ptr)) {
MS_LOG(INFO) << "Skip task generation for NonTask op " << anf_node_ptr->fullname_with_scope();
auto debug_info = std::make_shared<TaskDebugInfo>();
MS_EXCEPTION_IF_NULL(debug_info);
@ -171,16 +172,9 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
if (op_name != kAtomicAddrCleanOpName) {
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node_ptr);
for (size_t i = 0; i < input_num; ++i) {
if (op_name == kDynamicRNNOpName && i == kNonePlaceholderIdx) {
if (AnfAlgo::IsNoneInput(anf_node_ptr, i)) {
continue;
}
if (op_name == kDynamicGRUV2OpName) {
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(anf_node_ptr, "placeholder_index");
auto item = find(none_index.begin(), none_index.end(), i);
if (item != none_index.end()) {
continue;
}
}
auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i);
auto device_address = AnfAlgo::GetPrevNodeOutputAddr(anf_node_ptr, real_input_index);
AddressPtr input = std::make_shared<Address>();
@ -191,9 +185,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
auto prenode_with_index = AnfAlgo::GetPrevNodeOutput(anf_node_ptr, i);
MS_EXCEPTION_IF_NULL(prenode_with_index.first);
if (AnfUtils::IsRealCNodeKernel(prenode_with_index.first)) {
if ((AnfAlgo::GetCNodeName(prenode_with_index.first) == kSplitOpName ||
AnfAlgo::GetCNodeName(prenode_with_index.first) == kSplitVOpName) &&
AnfAlgo::HasNodeAttr(kAttrNonTask, prenode_with_index.first->cast<CNodePtr>())) {
if (AnfAlgo::IsNonTaskOp(prenode_with_index.first->cast<CNodePtr>())) {
// use memory offset to implement NonTask Type Split op
// when op A -> split(NonTask) -> op B, op B's input addr is split's input0's addr + offset
// offset is split's output index * split's output size

View File

@ -1123,18 +1123,9 @@ void KernelRuntime::GenLaunchArgs(const mindspore::kernel::KernelMod &kernel_mod
auto skip_nop_node = (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel);
for (size_t i = 0; i < input_num; ++i) {
auto op_name = AnfAlgo::GetCNodeName(cnode);
constexpr auto none_placeholder_index = 3;
if (op_name == kDynamicRNNOpName && i == none_placeholder_index) {
if (AnfAlgo::IsNoneInput(kernel, i)) {
continue;
}
if (op_name == kDynamicGRUV2OpName) {
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(cnode, "placeholder_index");
auto item = std::find(none_index.begin(), none_index.end(), i);
if (item != none_index.end()) {
continue;
}
}
auto real_input = AnfAlgo::GetRealInputIndex(kernel, i);
auto device_address = AnfAlgo::GetPrevNodeOutputAddr(kernel, real_input, skip_nop_node);
MS_EXCEPTION_IF_NULL(device_address);

View File

@ -23,6 +23,7 @@
#include "backend/optimizer/ascend/ascend_backend_optimization.h"
#include "backend/optimizer/graph_kernel/graph_kernel_optimization.h"
#include "utils/context/graph_kernel_flags.h"
#include "utils/utils.h"
#include "runtime/device/ascend/kernel_select_ascend.h"
#include "runtime/device/kernel_adjust.h"
#include "runtime/device/ascend/ascend_stream_assign.h"
@ -649,7 +650,7 @@ void AscendDeviceContext::PreprocessBeforeRunSingleOpGraph(const KernelGraphPtr
static const std::set<std::string> place_holder_nodes = {kDynamicRNNOpName, kDynamicGRUV2OpName};
auto iter = place_holder_nodes.find(op_name);
if (iter != place_holder_nodes.end()) {
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, "placeholder_index");
auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(node, kAttrPlaceHolderIndex);
// Remove seq_length
auto input_num = AnfAlgo::GetInputTensorNum(node);
std::vector<AnfNodePtr> new_inputs = {AnfAlgo::GetCNodePrimitiveNode(node)};

View File

@ -768,19 +768,26 @@ ShapeVector DeviceShapeTransfer::NDRNNBiasDeviceShape(const ShapeVector &shape,
ShapeVector DeviceShapeTransfer::GetAttrInputAndHiddenSize(const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
ShapeVector input_hidden_size = {kAlign16, kAlign16};
if (!node->isa<CNode>()) {
std::vector<int64_t> input_hidden_size = {kAlign16, kAlign16};
if (!node->isa<CNode>() && !node->isa<Parameter>()) {
return input_hidden_size;
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (!AnfAlgo::HasNodeAttr(kAttrHiddenSize, cnode) || !AnfAlgo::HasNodeAttr(kAttrInputSize, cnode)) {
MS_LOG(EXCEPTION)
<< "Node with format FRACTAL_ZN_RNN or ND_RNN_BIAS should have hidden_size or input_size attr. Node info:"
<< cnode->DebugString();
if (node->isa<Parameter>()) {
auto param = node->cast<ParameterPtr>();
input_hidden_size[0] = param->input_size();
input_hidden_size[1] = param->hidden_size();
} else {
CNodePtr cnode = node->cast<CNodePtr>();
if (cnode == nullptr || !AnfAlgo::HasNodeAttr(kAttrHiddenSize, cnode) ||
!AnfAlgo::HasNodeAttr(kAttrInputSize, cnode)) {
MS_LOG(EXCEPTION)
<< "Node with format FRACTAL_ZN_RNN or ND_RNN_BIAS should have hidden_size or input_size attr. Node info:"
<< node->DebugString();
}
input_hidden_size[0] = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrInputSize);
input_hidden_size[1] = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrHiddenSize);
}
input_hidden_size[0] = AnfAlgo::GetNodeAttr<int64_t>(node, kAttrInputSize);
input_hidden_size[1] = AnfAlgo::GetNodeAttr<int64_t>(node, kAttrHiddenSize);
return input_hidden_size;
}

View File

@ -512,6 +512,7 @@ constexpr auto kAttrFuncType = "func_type";
constexpr auto kAttrCustAicpu = "cust_aicpu";
constexpr auto kAttrIsInternalOutputNopNode = "is_internal_output_nop_node";
constexpr auto kAttrIsUBFusionOp = "is_ub_fusion_op";
constexpr auto kAttrPlaceHolderIndex = "placeholder_index";
constexpr auto kAttrMicro = "micro";
// custom operator func type