From b6c2812a295eca8ca0472385c49e84bcdfbf225a Mon Sep 17 00:00:00 2001
From: dayschan <chendeshi@huawei.com>
Date: Wed, 28 Oct 2020 10:02:55 +0800
Subject: [PATCH] Convert non-scalar tensor to parameter

Add a pass `tensor_promotion`.
Fix a bug in CreateKernelInfoFromNewParameter, which reset the KernelInfo by mistake.

what's more:
  Update akg
  Fixbug in model_builder when reduce axis is an interger.
---
 akg                                           |  2 +-
 .../graph_kernel/model/model_builder.py       |  2 +
 .../graph_kernel/basic_ops_fusion.cc          |  1 +
 .../graph_kernel/graph_kernel_helper.cc       | 11 ++--
 .../graph_kernel/graph_kernel_helper.h        |  4 ++
 .../graph_kernel/tensor_promotion.cc          | 57 +++++++++++++++++++
 .../optimizer/graph_kernel/tensor_promotion.h | 33 +++++++++++
 .../ccsrc/backend/session/gpu_session.cc      |  2 +
 .../ccsrc/backend/session/kernel_graph.cc     | 10 ++--
 9 files changed, 112 insertions(+), 10 deletions(-)
 create mode 100644 mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.cc
 create mode 100644 mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.h

diff --git a/akg b/akg
index 03ef896b90a..f308919c398 160000
--- a/akg
+++ b/akg
@@ -1 +1 @@
-Subproject commit 03ef896b90a34ebdb7eeb3fa77d7d4252d021011
+Subproject commit f308919c39811c2c3e07fb0dcc8054a533c84cbc
diff --git a/mindspore/_extends/graph_kernel/model/model_builder.py b/mindspore/_extends/graph_kernel/model/model_builder.py
index f5772b586f1..ce090e01644 100644
--- a/mindspore/_extends/graph_kernel/model/model_builder.py
+++ b/mindspore/_extends/graph_kernel/model/model_builder.py
@@ -176,6 +176,8 @@ class CompositeGraph:
                             if output.shape[i] == 1 and inputs[0].shape[i] > 1:
                                 red_axis.append(i)
                     else:
+                        if isinstance(a['value'], int):
+                            a['value'] = [a['value']]
                         for i in a['value']:
                             red_axis.append(i if i >= 0 else dim_size + i)
                     attr['reduce_axis'] = red_axis
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/basic_ops_fusion.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/basic_ops_fusion.cc
index aa6b06bdd33..9b21bdc0eee 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/basic_ops_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/basic_ops_fusion.cc
@@ -203,6 +203,7 @@ bool FuseBasicOps(const FuncGraphPtr &kernel_graph, const std::vector<AnfNodePtr
     AnfNodePtrList outputs;
     std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(fuse_nodes);
     RemoveControlDependOut(fg, &outputs, mng);
+    ConvertNonscalarTensorToParameter(fg, &inputs);
     auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, outputs, is_before_kernel_select);
     if (!is_before_kernel_select) {
       SetNewKernelInfo(fuse_new_node, fg, inputs, outputs, AnfAlgo::GetProcessor(fuse_nodes[0]));
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
index c9861547b6a..0a23c424e2a 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
@@ -263,8 +263,9 @@ bool GenJson(const AnfNodePtrList &op_nodes, const AnfNodePtrList &inputs, const
   MS_LOG(INFO) << "Collect fusion json: " << fused_name;
   return true;
 }
+}  // namespace
 
-void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) {
+bool ConvertNonscalarTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) {
   MS_EXCEPTION_IF_NULL(inputs_ptr);
   auto nodes = TopoSort(fg->get_return());
 
@@ -284,7 +285,7 @@ void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inp
   }
 
   if (vmap.empty()) {
-    return;
+    return false;
   }
 
   auto mng = fg->manager();
@@ -310,11 +311,12 @@ void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inp
 
     inputs.push_back(vnode);
   }
+  return true;
 }
 
 // Transform nodes(including basic and composite node) to a new graph, and collect their inputs and outputs.
 std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes,
-                                                                                AnfNodePtrList *src_outputs = nullptr) {
+                                                                                AnfNodePtrList *src_outputs) {
   FuncGraphPtr fg;
   AnfNodePtrList inputs;
   AnfNodePtrList outputs;
@@ -341,13 +343,12 @@ std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(
   }
 
   EliminateMakeTuple(fg, mng);
-  ConvertComplexTensorToParameter(fg, &inputs);
+  ConvertNonscalarTensorToParameter(fg, &inputs);
 
   outputs.clear();
   kernel::GetFuncGraphOutputNodes(fg, &outputs);
   return std::make_tuple(fg, inputs, outputs);
 }
-}  // namespace
 
 void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
                       const AnfNodePtrList &outputs, kernel::Processor processor) {
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h
index 64a66d9e7ba..d11c1a061a6 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h
@@ -19,6 +19,7 @@
 #include <vector>
 #include <memory>
 #include <map>
+#include <tuple>
 #include <unordered_set>
 #include <nlohmann/json.hpp>
 #include "ir/anf.h"
@@ -37,6 +38,9 @@ constexpr auto kJsonKeyMultiGraph = "multi_graph";
 constexpr auto kJsonKeyGraphDesc = "graph_desc";
 constexpr auto kJsonKeyGraphMode = "graph_mode";
 
+bool ConvertNonscalarTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr);
+std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes,
+                                                                                AnfNodePtrList *src_outputs = nullptr);
 void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
                       const AnfNodePtrList &outputs, kernel::Processor processor);
 AnfNodePtrList GetExpandOuts(const AnfNodePtrList &outs);
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.cc
new file mode 100644
index 00000000000..2dada1d853f
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.cc
@@ -0,0 +1,57 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/graph_kernel/tensor_promotion.h"
+#include <vector>
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/optimizer/graph_kernel/graph_kernel_helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "ir/func_graph.h"
+
+namespace mindspore {
+namespace opt {
+bool TensorPromotion::Run(const FuncGraphPtr &func_graph) {
+  MS_EXCEPTION_IF_NULL(func_graph);
+  auto mng = func_graph->manager();
+  if (mng == nullptr) {
+    mng = Manage(func_graph, true);
+    func_graph->set_manager(mng);
+  }
+  auto todos = TopoSort(func_graph->get_return());
+
+  bool changed = false;
+  for (auto iter = todos.crbegin(); iter != todos.crend(); ++iter) {
+    auto node = *iter;
+    if (!AnfAlgo::IsGraphKernel(node)) {
+      continue;
+    }
+    auto args = node->cast<CNodePtr>()->inputs();
+    auto fg = GetValueNode<FuncGraphPtr>(args[kAnfPrimitiveIndex]);
+    if (!ConvertNonscalarTensorToParameter(fg, &args)) {
+      continue;
+    }
+    AnfNodePtrList inputs, outputs;
+    inputs.insert(inputs.end(), args.begin() + 1, args.end());
+    kernel::GetFuncGraphOutputNodes(fg, &outputs);
+    auto new_cnode = CreateNewFuseCNode(func_graph, fg, inputs, outputs, false);
+    SetNewKernelInfo(new_cnode, fg, inputs, outputs, AnfAlgo::GetProcessor(node));
+    mng->Replace(node, new_cnode);
+    changed = true;
+  }
+
+  return changed;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.h
new file mode 100644
index 00000000000..bac71800194
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.h
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_
+#include <memory>
+#include "ir/func_graph.h"
+#include "backend/optimizer/common/pass.h"
+
+namespace mindspore {
+namespace opt {
+class TensorPromotion : public Pass {
+ public:
+  TensorPromotion() : Pass("graph_kernel_tensor_promotion") {}
+  ~TensorPromotion() override = default;
+  bool Run(const FuncGraphPtr &func_graph);
+};
+using TensorPromotionPtr = std::shared_ptr<TensorPromotion>;
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_
diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc
index 32d54744507..d5bdaf9d860 100644
--- a/mindspore/ccsrc/backend/session/gpu_session.cc
+++ b/mindspore/ccsrc/backend/session/gpu_session.cc
@@ -38,6 +38,7 @@
 #include "backend/optimizer/graph_kernel/arithmetic_simplify.h"
 #include "backend/optimizer/graph_kernel/basic_ops_fusion.h"
 #include "backend/optimizer/graph_kernel/composite_ops_fusion.h"
+#include "backend/optimizer/graph_kernel/tensor_promotion.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_splitter.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_expander.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_cse.h"
@@ -164,6 +165,7 @@ void GPUSession::GraphKernelOptimize(const std::shared_ptr<KernelGraph> &kernel_
   pm->AddPass(std::make_shared<opt::GraphKernelCSE>());
   pm->AddPass(std::make_shared<opt::ArithmeticSimplify>());
   pm->AddPass(std::make_shared<opt::GraphKernelCSE>());
+  pm->AddPass(std::make_shared<opt::TensorPromotion>());
   pm->AddPass(std::make_shared<opt::GraphKernelSplitter>());
   // After Simplify and Splitter, a lot of redundant getitem/maketuple
   // will be exposed, use GetitemTuple Pass to delete them.
diff --git a/mindspore/ccsrc/backend/session/kernel_graph.cc b/mindspore/ccsrc/backend/session/kernel_graph.cc
index 415403f9d74..2a57707a7d7 100644
--- a/mindspore/ccsrc/backend/session/kernel_graph.cc
+++ b/mindspore/ccsrc/backend/session/kernel_graph.cc
@@ -395,8 +395,9 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) {
   kernel::GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list);
   for (auto &anf_node : node_list) {
     MS_EXCEPTION_IF_NULL(anf_node);
-    auto kernel_info = std::make_shared<device::KernelInfo>();
-    anf_node->set_kernel_info(kernel_info);
+    if (anf_node->kernel_info() == nullptr) {
+      anf_node->set_kernel_info(std::make_shared<device::KernelInfo>());
+    }
     auto anf_cnode = anf_node->cast<CNodePtr>();
     MS_EXCEPTION_IF_NULL(anf_cnode);
     for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_cnode); ++i) {
@@ -412,8 +413,9 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) {
   }
   for (auto &anf_node : input_list) {
     MS_EXCEPTION_IF_NULL(anf_node);
-    auto kernel_info = std::make_shared<device::KernelInfo>();
-    anf_node->set_kernel_info(kernel_info);
+    if (anf_node->kernel_info() == nullptr) {
+      anf_node->set_kernel_info(std::make_shared<device::KernelInfo>());
+    }
   }
 }