Convert non-scalar tensor to parameter

Add a pass `tensor_promotion`. Fix a bug in CreateKernelInfoFromNewParameter, which reset the KernelInfo by mistake. what's more: Update akg Fixbug in model_builder when reduce axis is an interger.
2020-10-28 10:02:55 +08:00 · 2020-10-28 10:02:55 +08:00 · b6c2812a29
parent b3b553245f
commit b6c2812a29
9 changed files with 112 additions and 10 deletions
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 03ef896b90a34ebdb7eeb3fa77d7d4252d021011
+Subproject commit f308919c39811c2c3e07fb0dcc8054a533c84cbc
--- a/mindspore/_extends/graph_kernel/model/model_builder.py
+++ b/mindspore/_extends/graph_kernel/model/model_builder.py
@ -176,6 +176,8 @@ class CompositeGraph:
                            if output.shape[i] == 1 and inputs[0].shape[i] > 1:
                                red_axis.append(i)
                    else:
+                        if isinstance(a['value'], int):
+                            a['value'] = [a['value']]
                        for i in a['value']:
                            red_axis.append(i if i >= 0 else dim_size + i)
                    attr['reduce_axis'] = red_axis
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/basic_ops_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/basic_ops_fusion.cc
@ -203,6 +203,7 @@ bool FuseBasicOps(const FuncGraphPtr &kernel_graph, const std::vector<AnfNodePtr
    AnfNodePtrList outputs;
    std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(fuse_nodes);
    RemoveControlDependOut(fg, &outputs, mng);
+    ConvertNonscalarTensorToParameter(fg, &inputs);
    auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, outputs, is_before_kernel_select);
    if (!is_before_kernel_select) {
      SetNewKernelInfo(fuse_new_node, fg, inputs, outputs, AnfAlgo::GetProcessor(fuse_nodes[0]));
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
@ -263,8 +263,9 @@ bool GenJson(const AnfNodePtrList &op_nodes, const AnfNodePtrList &inputs, const
  MS_LOG(INFO) << "Collect fusion json: " << fused_name;
  return true;
 }
+}  // namespace

-void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) {
+bool ConvertNonscalarTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) {
  MS_EXCEPTION_IF_NULL(inputs_ptr);
  auto nodes = TopoSort(fg->get_return());

@ -284,7 +285,7 @@ void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inp
  }

  if (vmap.empty()) {
-    return;
+    return false;
  }

  auto mng = fg->manager();
@ -310,11 +311,12 @@ void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inp

    inputs.push_back(vnode);
  }
+  return true;
 }

 // Transform nodes(including basic and composite node) to a new graph, and collect their inputs and outputs.
 std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes,
-                                                                                AnfNodePtrList *src_outputs = nullptr) {
+                                                                                AnfNodePtrList *src_outputs) {
  FuncGraphPtr fg;
  AnfNodePtrList inputs;
  AnfNodePtrList outputs;
@ -341,13 +343,12 @@ std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(
  }

  EliminateMakeTuple(fg, mng);
-  ConvertComplexTensorToParameter(fg, &inputs);
+  ConvertNonscalarTensorToParameter(fg, &inputs);

  outputs.clear();
  kernel::GetFuncGraphOutputNodes(fg, &outputs);
  return std::make_tuple(fg, inputs, outputs);
 }
-}  // namespace

 void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
                      const AnfNodePtrList &outputs, kernel::Processor processor) {
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.h
@ -19,6 +19,7 @@
 #include <vector>
 #include <memory>
 #include <map>
+#include <tuple>
 #include <unordered_set>
 #include <nlohmann/json.hpp>
 #include "ir/anf.h"
@ -37,6 +38,9 @@ constexpr auto kJsonKeyMultiGraph = "multi_graph";
 constexpr auto kJsonKeyGraphDesc = "graph_desc";
 constexpr auto kJsonKeyGraphMode = "graph_mode";

+bool ConvertNonscalarTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr);
+std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes,
+                                                                                AnfNodePtrList *src_outputs = nullptr);
 void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
                      const AnfNodePtrList &outputs, kernel::Processor processor);
 AnfNodePtrList GetExpandOuts(const AnfNodePtrList &outs);
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.cc
@ -0,0 +1,57 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/graph_kernel/tensor_promotion.h"
+#include <vector>
+#include "backend/kernel_compiler/common_utils.h"
+#include "backend/optimizer/graph_kernel/graph_kernel_helper.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "ir/func_graph.h"
+
+namespace mindspore {
+namespace opt {
+bool TensorPromotion::Run(const FuncGraphPtr &func_graph) {
+  MS_EXCEPTION_IF_NULL(func_graph);
+  auto mng = func_graph->manager();
+  if (mng == nullptr) {
+    mng = Manage(func_graph, true);
+    func_graph->set_manager(mng);
+  }
+  auto todos = TopoSort(func_graph->get_return());
+
+  bool changed = false;
+  for (auto iter = todos.crbegin(); iter != todos.crend(); ++iter) {
+    auto node = *iter;
+    if (!AnfAlgo::IsGraphKernel(node)) {
+      continue;
+    }
+    auto args = node->cast<CNodePtr>()->inputs();
+    auto fg = GetValueNode<FuncGraphPtr>(args[kAnfPrimitiveIndex]);
+    if (!ConvertNonscalarTensorToParameter(fg, &args)) {
+      continue;
+    }
+    AnfNodePtrList inputs, outputs;
+    inputs.insert(inputs.end(), args.begin() + 1, args.end());
+    kernel::GetFuncGraphOutputNodes(fg, &outputs);
+    auto new_cnode = CreateNewFuseCNode(func_graph, fg, inputs, outputs, false);
+    SetNewKernelInfo(new_cnode, fg, inputs, outputs, AnfAlgo::GetProcessor(node));
+    mng->Replace(node, new_cnode);
+    changed = true;
+  }
+
+  return changed;
+}
+}  // namespace opt
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/tensor_promotion.h
@ -0,0 +1,33 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_
+#include <memory>
+#include "ir/func_graph.h"
+#include "backend/optimizer/common/pass.h"
+
+namespace mindspore {
+namespace opt {
+class TensorPromotion : public Pass {
+ public:
+  TensorPromotion() : Pass("graph_kernel_tensor_promotion") {}
+  ~TensorPromotion() override = default;
+  bool Run(const FuncGraphPtr &func_graph);
+};
+using TensorPromotionPtr = std::shared_ptr<TensorPromotion>;
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_
--- a/mindspore/ccsrc/backend/session/gpu_session.cc
+++ b/mindspore/ccsrc/backend/session/gpu_session.cc
@ -38,6 +38,7 @@
 #include "backend/optimizer/graph_kernel/arithmetic_simplify.h"
 #include "backend/optimizer/graph_kernel/basic_ops_fusion.h"
 #include "backend/optimizer/graph_kernel/composite_ops_fusion.h"
+#include "backend/optimizer/graph_kernel/tensor_promotion.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_splitter.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_expander.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_cse.h"
@ -164,6 +165,7 @@ void GPUSession::GraphKernelOptimize(const std::shared_ptr<KernelGraph> &kernel_
  pm->AddPass(std::make_shared<opt::GraphKernelCSE>());
  pm->AddPass(std::make_shared<opt::ArithmeticSimplify>());
  pm->AddPass(std::make_shared<opt::GraphKernelCSE>());
+  pm->AddPass(std::make_shared<opt::TensorPromotion>());
  pm->AddPass(std::make_shared<opt::GraphKernelSplitter>());
  // After Simplify and Splitter, a lot of redundant getitem/maketuple
  // will be exposed, use GetitemTuple Pass to delete them.
--- a/mindspore/ccsrc/backend/session/kernel_graph.cc
+++ b/mindspore/ccsrc/backend/session/kernel_graph.cc
@ -395,8 +395,9 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) {
  kernel::GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list);
  for (auto &anf_node : node_list) {
    MS_EXCEPTION_IF_NULL(anf_node);
-    auto kernel_info = std::make_shared<device::KernelInfo>();
-    anf_node->set_kernel_info(kernel_info);
+    if (anf_node->kernel_info() == nullptr) {
+      anf_node->set_kernel_info(std::make_shared<device::KernelInfo>());
+    }
    auto anf_cnode = anf_node->cast<CNodePtr>();
    MS_EXCEPTION_IF_NULL(anf_cnode);
    for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_cnode); ++i) {
@ -412,8 +413,9 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) {
  }
  for (auto &anf_node : input_list) {
    MS_EXCEPTION_IF_NULL(anf_node);
-    auto kernel_info = std::make_shared<device::KernelInfo>();
-    anf_node->set_kernel_info(kernel_info);
+    if (anf_node->kernel_info() == nullptr) {
+      anf_node->set_kernel_info(std::make_shared<device::KernelInfo>());
+    }
  }
 }