!14648 [GraphKernel]adapt for layermorm C++ code

From: @wenfangpei
Reviewed-by: @coding2020,@gaoxiong1,@ckey_dou
Signed-off-by: @ckey_dou
This commit is contained in:
mindspore-ci-bot 2021-04-06 17:02:06 +08:00 committed by Gitee
commit 9aa645e202
7 changed files with 82 additions and 16 deletions

View File

@ -472,7 +472,7 @@ class GraphSplitAscend(GraphSplitByPattern):
def get_default_mode(self, op):
if op.prim == "MatMul":
return self.Area.MODE_COMPOSITE if op.inputs[0].dtype == "float16" else self.Area.MODE_BASIC
if op.prim in ("Tile", "BroadcastTo"):
if op.prim in ("Tile", "BroadcastTo", "ExpandDims"):
return self.Area.MODE_COMPOSITE
return self.Area.MODE_BASIC

View File

@ -34,7 +34,9 @@
#include "pipeline/jit/action.h"
#include "utils/context/graph_kernel_flags.h"
#include "vm/segment_runner.h"
#if ENABLE_GPU
#if ENABLE_D
#include "runtime/device/ascend/kernel_select_ascend.h"
#elif ENABLE_GPU
#include "runtime/device/gpu/kernel_info_setter.h"
#endif
@ -620,7 +622,11 @@ bool IsBasicFuseOp(const AnfNodePtr &node) {
std::vector<PrimitivePtr> basic_ops = GetFusibleOpList();
#if ENABLE_D
if (!CheckProcessor(node)) {
return false;
std::vector<PrimitivePtr> fused_aicpu_op = {prim::kPrimExpandDims, prim::kPrimReshape};
if (!std::any_of(fused_aicpu_op.begin(), fused_aicpu_op.end(),
[&node](const PrimitivePtr &prim) { return IsPrimitiveCNode(node, prim); })) {
return false;
}
}
#endif
return std::any_of(basic_ops.begin(), basic_ops.end(),
@ -644,7 +650,9 @@ bool IsFusibleOp(const AnfNodePtr &node) {
void ResetKernelInfo(const AnfNodePtr &node, KernelType kernel_type) {
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
#if ENABLE_GPU
#if ENABLE_D
device::ascend::SetKernelInfo(cnode, kernel_type);
#elif ENABLE_GPU
device::gpu::SetKernelInfo(cnode, kernel_type);
#endif
}

View File

@ -100,17 +100,17 @@ PassManagerPtr GraphKernelOptimizer::HighLevelOpt1() {
PassManagerPtr GraphKernelOptimizer::Split() {
auto pm = std::make_shared<PassManager>("graphkernel_stage4_split");
// Move the non-scalar tensor (in composite node) to parameter list
pm->AddPass(std::make_shared<TensorPromotion>());
// Make certain nodes redundant so that they are used by only one user,
// which can avoid unnecessary input-output and get better performance.
if (is_gpu) {
// preprocess for ShapeOpsSplitter
pm->AddPass(std::make_shared<ExtendOutputForUpdateState>());
std::vector<PrimitivePtr> duplicated_ops = {prim::kPrimReshape, prim::kPrimExpandDims, prim::kPrimCast};
pm->AddPass(std::make_shared<ShapeOpsSplitter>(duplicated_ops));
}
// preprocess for ShapeOpsSplitter
pm->AddPass(std::make_shared<ExtendOutputForUpdateState>());
std::vector<PrimitivePtr> duplicated_ops = {prim::kPrimReshape, prim::kPrimExpandDims, prim::kPrimCast};
pm->AddPass(std::make_shared<ShapeOpsSplitter>(duplicated_ops));
// Split kernel according to costmodel
pm->AddPass(std::make_shared<GraphKernelSplitter>());
@ -120,11 +120,9 @@ PassManagerPtr GraphKernelOptimizer::Split() {
pm->AddPass(std::make_shared<GetitemTuple>());
// Eliminate the redundant node that is copied above but not handled by GraphKernelSplitter
if (is_gpu) {
pm->AddPass(std::make_shared<MergeOutputForUpdateState>());
pm->AddPass(std::make_shared<GraphKernelCSE>());
pm->AddPass(std::make_shared<EliminateRedundantOutput>());
}
pm->AddPass(std::make_shared<MergeOutputForUpdateState>());
pm->AddPass(std::make_shared<GraphKernelCSE>());
pm->AddPass(std::make_shared<EliminateRedundantOutput>());
return pm;
}

View File

@ -359,12 +359,19 @@ class Splitter {
Splitter(const CNodePtr &main_cnode, SplitSchemerPtr split_schemer)
: main_func_graph_(main_cnode->func_graph()), old_subgraph_cnode_(main_cnode), split_schemer_(split_schemer) {}
void ResetInlinedNodesKernelInfo() {
for (const auto &node : inlined_nodes_) {
ResetKernelInfo(node);
}
}
// Maintain new subgraphs in main graph.
void RebuildGraph(const std::vector<size_t> &cnodes_group_id) {
BindFuncGraph();
RecoverParameter();
ConnectToMainGraph(cnodes_group_id);
UpdateSubGraphInfo();
ResetInlinedNodesKernelInfo();
}
// Rebind nodes to its new sub_func_graph
@ -420,7 +427,7 @@ class Splitter {
}
}
if (AnfAlgo::IsRealKernel(node)) {
ResetKernelInfo(node);
inlined_nodes_.push_back(node);
}
}
}
@ -533,6 +540,7 @@ class Splitter {
FuncGraphPtr main_func_graph_;
CNodePtr old_subgraph_cnode_; // The cnode that holds the original sub_func_graph
std::vector<CNodePtr> new_subgraph_cnodes_; // The cnode list that hold the new sub_func_graph
std::vector<AnfNodePtr> inlined_nodes_;
SplitSchemerPtr split_schemer_;
std::unordered_map<ParameterPtr, AnfNodePtr> param_to_main_graph_node_map_;
};

View File

@ -54,6 +54,7 @@
#include "debug/data_dump/dump_json_parser.h"
#include "debug/tensor_load.h"
#include "debug/anf_ir_utils.h"
#include "backend/optimizer/graph_kernel/shape_ops_splitter.h"
#include "backend/optimizer/graph_kernel/graph_kernel_optimization.h"
#include "backend/session/ascend_auto_monad.h"
#include "debug/data_dump/e2e_dump_util.h"

View File

@ -515,6 +515,56 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern
}
return select_status;
}
void SetKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type) {
auto kernel_info = static_cast<device::KernelInfo *>(kernel_node->kernel_info());
MS_EXCEPTION_IF_NULL(kernel_info);
auto kernel_build_info = kernel_info->select_kernel_build_info();
MS_EXCEPTION_IF_NULL(kernel_build_info);
if (AnfAlgo::IsGraphKernel(kernel_node)) {
return;
}
auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
builder->SetOriginDataFormat(kernel_build_info->GetOriginDataFormat());
builder->SetInputsFormat(kernel_build_info->GetAllInputFormats());
builder->SetInputsDeviceType(kernel_build_info->GetAllInputDeviceTypes());
builder->SetOutputsFormat(kernel_build_info->GetAllOutputFormats());
builder->SetOutputsDeviceType(kernel_build_info->GetAllOutputDeviceTypes());
builder->SetOpPattern(kernel_build_info->op_pattern());
builder->SetFusionType(kernel_build_info->fusion_type());
auto new_kernel_type = kernel_type;
auto new_processor = kernel_build_info->processor();
if (kernel_type == UNKNOWN_KERNEL_TYPE) {
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> aicpu_kernel_info_list;
kernel::KernelQuery(kernel_node, &kernel_info_list, kernel_type);
auto select_status = SetMatchedKernelInfo(kernel_node, kernel_info_list);
if (select_status != kNoMatched) {
new_kernel_type = TBE_KERNEL;
new_processor = kernel::Processor::AICORE;
MS_LOG(INFO) << kernel_node->fullname_with_scope() << " uses TBE_KERNEL";
} else {
kernel::AICPUQuery(kernel_node, &aicpu_kernel_info_list);
select_status = SetMatchedKernelInfo(kernel_node, aicpu_kernel_info_list);
if (select_status != kNoMatched) {
new_kernel_type = AICPU_KERNEL;
new_processor = kernel::Processor::AICPU;
MS_LOG(INFO) << kernel_node->fullname_with_scope() << " uses AICPU_KERNEL";
}
}
}
if (new_kernel_type == UNKNOWN_KERNEL_TYPE) {
new_kernel_type = AKG_KERNEL;
new_processor = kernel::Processor::AICORE;
MS_LOG(INFO) << kernel_node->fullname_with_scope() << " uses AKG_KERNEL";
}
builder->SetKernelType(new_kernel_type);
builder->SetProcessor(new_processor);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), kernel_node.get());
}
} // namespace ascend
} // namespace device
} // namespace mindspore

View File

@ -31,6 +31,7 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node,
KernelType kernel_type = KernelType::UNKNOWN_KERNEL_TYPE);
void SetTensorDeviceInfo(const CNodePtr &kernel_node);
void SelectGraphKernelInfo(const CNodePtr &kernel_node, const FuncGraphPtr &func_graph);
void SetKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type);
} // namespace ascend
} // namespace device
} // namespace mindspore