!8981 gatherv2 pad optimizer in dynamic shape scene

From: @yao_yf Reviewed-by: @stsuteng,@kisnwang Signed-off-by: @stsuteng
2020-11-26 14:31:56 +08:00 · 2020-11-26 14:31:56 +08:00 · c78683a411
parent 6bb3912b10 444cb99b40
commit c78683a411
12 changed files with 299 additions and 1 deletions
--- a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
@ -31,6 +31,7 @@
 #include "backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h"
 #include "backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h"
 #include "backend/optimizer/ascend/ir_fission/unsorted_segment_sum_fission.h"
+#include "backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.h"
 #include "backend/optimizer/pass/communication_op_fusion.h"
 #include "backend/optimizer/ascend/ir_fusion/square_sum_fusion.h"
 #include "backend/optimizer/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
@ -181,6 +182,7 @@ void AddAscendIRFusionPass(PassManager *ir_fusion_pm) {
  ir_fusion_pm->AddPass(std::make_shared<ConcatFission>());
  ir_fusion_pm->AddPass(std::make_shared<ReduceMinFission>());
  ir_fusion_pm->AddPass(std::make_shared<UnsortSegmentSumFission>());
+  ir_fusion_pm->AddPass(std::make_shared<GatherV2DsFission>());
 }
 }  // namespace

--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.cc
@ -0,0 +1,177 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.h"
+#include <memory>
+#include <vector>
+#include <string>
+#include "backend/session/anf_runtime_algorithm.h"
+#include "ir/primitive.h"
+#include "utils/utils.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+// only pad operator can run in dynamic shape.
+CNodePtr CreatePad(const FuncGraphPtr &graph, const CNodePtr &origin_node, const size_t &pad_dim_size) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(origin_node);
+  std::vector<AnfNodePtr> pad_inputs = {NewValueNode(std::make_shared<Primitive>(kPadOpName)), origin_node->input(1)};
+  auto pad = graph->NewCNode(pad_inputs);
+  MS_EXCEPTION_IF_NULL(pad);
+  pad->set_scope(origin_node->scope());
+
+  auto param_abstract_shape = origin_node->input(1)->Shape();
+  MS_EXCEPTION_IF_NULL(param_abstract_shape);
+  if (!param_abstract_shape->isa<abstract::Shape>()) {
+    MS_LOG(EXCEPTION) << "Gatherv2 's first input has wrong shape type";
+  }
+  auto param_dyn_shape = param_abstract_shape->cast<abstract::ShapePtr>();
+  ShapeVector shape(param_dyn_shape->shape());
+  if (shape.empty()) {
+    MS_LOG(EXCEPTION) << "Gatherv2 's shape is empty";
+  }
+  if (shape[shape.size() - 1] == -1) {
+    MS_LOG(EXCEPTION) << "Dim needs pad should not be dynamic";
+  }
+  shape[shape.size() - 1] = pad_dim_size;
+  auto type_id = AnfAlgo::GetPrevNodeOutputInferDataType(origin_node, 0);
+  auto abstract = std::make_shared<abstract::AbstractTensor>(TypeIdToType(type_id), shape);
+  if (param_dyn_shape->max_shape().size() == param_dyn_shape->shape().size() &&
+      param_dyn_shape->min_shape().size() == param_dyn_shape->shape().size()) {
+    ShapeVector max_shape(param_dyn_shape->max_shape());
+    ShapeVector min_shape(param_dyn_shape->min_shape());
+    ShapeVector new_shape(shape);
+    max_shape[max_shape.size() - 1] = pad_dim_size;
+    min_shape[min_shape.size() - 1] = pad_dim_size;
+    abstract->set_shape(std::make_shared<abstract::Shape>(new_shape, min_shape, max_shape));
+  }
+  pad->set_abstract(abstract);
+
+  std::vector<ValuePtr> elements;
+  for (size_t i = 0; i < shape.size() - 1; ++i) {
+    ShapeVector padding_vector(2);
+    auto padding_value = MakeValue(padding_vector);
+    elements.push_back(padding_value);
+  }
+  ShapeVector last_padding_vector = {0, SizeToLong(pad_dim_size - 1)};
+  auto last_padding_value = MakeValue(last_padding_vector);
+  elements.push_back(last_padding_value);
+  ValueTuplePtr paddings = std::make_shared<ValueTuple>(elements);
+  AnfAlgo::SetNodeAttr(kAttrPaddings, paddings, pad);
+  AnfAlgo::SetNodeAttr(kAttrIsDynamicShape, MakeValue(true), pad);
+  AnfAlgo::SetNodeAttr(kAttrInputIsDynamicShape, MakeValue(true), pad);
+  AnfAlgo::SetNodeAttr(kAttrOutputIsDynamicShape, MakeValue(true), pad);
+  return pad;
+}
+
+CNodePtr CreateGatherV2Ds(const FuncGraphPtr &graph, const CNodePtr &origin_node, const CNodePtr &pad,
+                          const size_t &pad_dim_size) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(origin_node);
+  MS_EXCEPTION_IF_NULL(pad);
+  if (origin_node->size() != 4) {
+    MS_LOG(EXCEPTION) << "In dynamic shape scene, gatherv2 should have 3 inputs";
+  }
+  std::vector<AnfNodePtr> gatherv2_inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimGatherV2->name())),
+                                             pad, origin_node->input(2), origin_node->input(3)};
+  auto gather_v2 = graph->NewCNode(gatherv2_inputs);
+  MS_EXCEPTION_IF_NULL(gather_v2);
+  gather_v2->set_scope(origin_node->scope());
+
+  auto shape = AnfAlgo::GetOutputInferShape(origin_node, 0);
+  shape[shape.size() - 1] = pad_dim_size;
+  AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(origin_node, 0)}, {shape}, gather_v2.get());
+  AnfAlgo::SetNodeAttr(kAttrIsDynamicShape, MakeValue(true), gather_v2);
+  AnfAlgo::SetNodeAttr(kAttrInputIsDynamicShape, MakeValue(true), gather_v2);
+  auto depends_list_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(origin_node, kAttrDynamicShapeDepends);
+  AnfAlgo::SetNodeAttr(kAttrDynamicShapeDepends, MakeValue(depends_list_me), gather_v2);
+  auto input_names = AnfAlgo::GetNodeAttr<std::vector<std::string>>(origin_node, kAttrInputNames);
+  AnfAlgo::SetNodeAttr(kAttrInputNames, MakeValue(input_names), gather_v2);
+  auto output_names = AnfAlgo::GetNodeAttr<std::vector<std::string>>(origin_node, kAttrOutputNames);
+  AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(output_names), gather_v2);
+  return gather_v2;
+}
+
+CNodePtr CreateSlice(const FuncGraphPtr &graph, const CNodePtr &gather_v2, const CNodePtr &gather_v2_padding_8) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(gather_v2);
+  MS_EXCEPTION_IF_NULL(gather_v2_padding_8);
+  std::vector<AnfNodePtr> slice_inputs = {NewValueNode(std::make_shared<Primitive>(kSliceOpName)), gather_v2_padding_8};
+  auto slice = graph->NewCNode(slice_inputs);
+  MS_EXCEPTION_IF_NULL(slice);
+  slice->set_scope(gather_v2->scope());
+  slice->set_abstract(gather_v2->abstract());
+  auto gather_v2_shape = AnfAlgo::GetOutputInferShape(gather_v2, 0);
+  std::vector<size_t> offsets(gather_v2_shape.size(), 0);
+  AnfAlgo::SetNodeAttr(kAttrBegin, MakeValue(Convert2Long(offsets)), slice);
+  AnfAlgo::SetNodeAttr(kAttrSize, MakeValue(Convert2Long(gather_v2_shape)), slice);
+  return slice;
+}
+
+bool CheckInputs(const CNodePtr &origin_node) {
+  MS_EXCEPTION_IF_NULL(origin_node);
+  if (origin_node->size() != kGatherV2DynInputNum + 1) {
+    MS_LOG(DEBUG) << "GatherV2 in dynamic shape has wrong inputs num, not equal " << kGatherV2DynInputNum
+                  << ". CNode= " << origin_node->DebugString();
+    return false;
+  }
+  auto param_shape = AnfAlgo::GetPrevNodeOutputInferShape(origin_node, 0);
+  auto indice_shape = AnfAlgo::GetPrevNodeOutputInferShape(origin_node, 1);
+
+  // this optimizer only support embedding_table has dynamic shape
+  if (param_shape.empty() || indice_shape.empty() || AnfAlgo::IsDynamicShape(origin_node->input(2))) {
+    return false;
+  }
+  if (param_shape[param_shape.size() - 1] != 1) {
+    MS_LOG(DEBUG) << "GatherV2 in dynamic shape is not need fission. The last value of input0's shape is "
+                  << param_shape[param_shape.size() - 1];
+    return false;
+  }
+  return true;
+}
+}  // namespace
+
+const BaseRef GatherV2DsFission::DefinePattern() const {
+  VarPtr Xs = std::make_shared<SeqVar>();
+  VectorRef pattern({prim::kPrimGatherV2, Xs});
+  return pattern;
+}
+
+const AnfNodePtr GatherV2DsFission::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, const EquivPtr &) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(node);
+  auto origin_node = node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(origin_node);
+  if (!CheckInputs(origin_node)) {
+    return nullptr;
+  }
+  size_t pad_dim_size;
+  auto input_dtype = AnfAlgo::GetPrevNodeOutputInferDataType(origin_node, 0);
+  if (input_dtype == kNumberTypeFloat32) {
+    pad_dim_size = 8;
+  } else if (input_dtype == kNumberTypeFloat16) {
+    pad_dim_size = 16;
+  } else {
+    MS_LOG(DEBUG) << "GatherV2 data type not in (float32, float16), no need change";
+    return nullptr;
+  }
+  CNodePtr gather_v2_8;
+  auto pad = CreatePad(graph, origin_node, pad_dim_size);
+  gather_v2_8 = CreateGatherV2Ds(graph, origin_node, pad, pad_dim_size);
+  return CreateSlice(graph, origin_node, gather_v2_8);
+}
+}  // namespace opt
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/gather_v2_ds_fission.h
@ -0,0 +1,36 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FISSION_GATHER_V2_DS_FISSION_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FISSION_GATHER_V2_DS_FISSION_H_
+
+#include <vector>
+#include <memory>
+#include "backend/optimizer/common/optimizer.h"
+#include "backend/optimizer/common/helper.h"
+#include "backend/optimizer/ascend/ascend_helper.h"
+
+namespace mindspore {
+namespace opt {
+class GatherV2DsFission : public PatternProcessPass {
+ public:
+  explicit GatherV2DsFission(bool multigraph = true) : PatternProcessPass("gather_v2_ds_fission", multigraph) {}
+  ~GatherV2DsFission() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FISSION_GATHER_V2_DS_FISSION_H_
--- a/mindspore/ccsrc/backend/optimizer/common/helper.h
+++ b/mindspore/ccsrc/backend/optimizer/common/helper.h
@ -98,6 +98,7 @@ constexpr size_t kTopkInputNum = 3;
 constexpr size_t kLarsV2InputNum = 5;
 constexpr size_t kFusedMulApplyMomentumOutputNum = 2;
 constexpr size_t kSplitInputNum = 2;
+constexpr size_t kGatherV2DynInputNum = 3;
 constexpr size_t kUnsortedSegmentSumInputNum = 2;

 enum FusedBatchNormInput {
--- a/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_calculater.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_calculater.cc
@ -148,6 +148,7 @@ std::string GetRealOpType(const std::string &op_type) {
    {"SparseApplyFtrl", "SparseApplyFtrlD"},
    {"SparseApplyProximalAdagrad", "SparseApplyProximalAdagradD"},
    {"SparseGatherV2", "GatherV2"},
+    {"Pad", "PadD"},
  };
  auto iter = kOpTypeMap.find(op_type);
  if (iter == kOpTypeMap.end()) {
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@ -323,12 +323,14 @@ constexpr auto kAttrT = "T";
 constexpr auto kAttrNum = "num";
 constexpr auto kAttrRankSize = "rank_size";
 constexpr auto kAttrPadDimSize = "pad_dim_size";
+constexpr auto kAttrPaddings = "paddings";
 constexpr auto kAttrNumSegments = "num_segments";
 constexpr auto kAttrBegin = "begin";
 constexpr auto kAttrSize = "size";
 constexpr auto kAttrIsDynamicShape = "is_dynamic_shape";
 constexpr auto kAttrInputIsDynamicShape = "input_is_dynamic_shape";
 constexpr auto kAttrOutputIsDynamicShape = "output_is_dynamic_shape";
+constexpr auto kAttrDynamicShapeDepends = "dynamic_shape_depends";
 constexpr auto kAttrPynativeNextOpName = "next_op";
 constexpr auto kAttrPynativeNextIndex = "next_index";
 constexpr auto kAttrCompileInfo = "compile_info";
--- a/mindspore/core/abstract/infer_functions.h
+++ b/mindspore/core/abstract/infer_functions.h
@ -251,7 +251,8 @@ AbstractBasePtr InferImplExpandDims(const AnalysisEnginePtr &, const PrimitivePt
                                    const AbstractBasePtrList &args_spec_list);
 AbstractBasePtr InferImplGpuConvertToDynamicShape(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                                  const AbstractBasePtrList &args_spec_list);
-
+AbstractBasePtr InferImplPad(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                             const AbstractBasePtrList &args_spec_list);
 template <typename T>
 AbstractBasePtr InferTupleOrListOrDictLen(const std::string &op_name, const AbstractBasePtrList &args_spec_list) {
  // Inputs: a tuple or list or dict.
--- a/mindspore/core/abstract/prim_nn.cc
+++ b/mindspore/core/abstract/prim_nn.cc
@ -470,5 +470,39 @@ AbstractBasePtr InferImplSGD(const AnalysisEnginePtr &, const PrimitivePtr &prim
  elements.push_back(args_spec_list[0]->Clone()->Broaden());
  return std::make_shared<AbstractTuple>(elements);
 }
+
+AbstractBasePtr InferImplPad(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                             const AbstractBasePtrList &args_spec_list) {
+  const std::string op_name = primitive->name();
+  CheckArgsSize(op_name, args_spec_list, 1);
+  auto arg = CheckArg<AbstractTensor>(op_name, args_spec_list, 0);
+  auto input_shp = arg->shape()->shape();
+  MS_EXCEPTION_IF_NULL(primitive);
+  auto padding_attr = primitive->GetAttr("paddings");
+  MS_EXCEPTION_IF_NULL(padding_attr);
+  if (!padding_attr->isa<ValueTuple>()) {
+    MS_LOG(EXCEPTION) << "paddings is not a ValueTuple";
+  }
+  std::vector<ValuePtr> paddings = padding_attr->cast<ValueTuplePtr>()->value();
+  std::vector<std::vector<int64_t>> paddings_vec;
+  for (ValuePtr paddings_elements : paddings) {
+    std::vector<ValuePtr> paddings_elements_tuple = paddings_elements->cast<ValueTuplePtr>()->value();
+    std::vector<int64_t> paddings_vec_item;
+    (void)std::transform(std::begin(paddings_elements_tuple), std::end(paddings_elements_tuple),
+                         std::back_inserter(paddings_vec_item),
+                         [](const ValuePtr &e) -> int64_t { return GetValue<int64_t>(e); });
+    paddings_vec.push_back(paddings_vec_item);
+  }
+
+  ShapeVector result_shp;
+  size_t length = paddings_vec.size();
+  for (size_t i = 0; i < length; ++i) {
+    if (paddings_vec[i].size() != 2) {
+      MS_LOG(EXCEPTION) << "paddings 's second dim size is not 2";
+    }
+    result_shp.push_back(input_shp[i] + paddings_vec[i][0] + paddings_vec[i][1]);
+  }
+  return std::make_shared<AbstractTensor>(arg->element(), std::make_shared<Shape>(result_shp));
+}
 }  // namespace abstract
 }  // namespace mindspore
--- a/mindspore/core/abstract/primitive_infer_map.cc
+++ b/mindspore/core/abstract/primitive_infer_map.cc
@ -50,6 +50,7 @@ PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() {
    {prim::kPrimArrayToScalar, {InferImplArrayToScalar, true}},
    {prim::kPrimBroadcastShape, {InferImplBroadCastShape, true}},
    {prim::kPrimPack, {InferImplPack, true}},
+    {prim::kPrimPad, {InferImplPad, true}},
    {prim::kPrimUnique, {InferImplUnique, true}},
    {prim::kPrimUniqueGrad, {InferImplUniqueGrad, true}},
    {prim::kPrimGatherV2, {InferImplGatherV2, true}},
--- a/mindspore/core/base/core_ops.h
+++ b/mindspore/core/base/core_ops.h
@ -101,6 +101,7 @@ inline const PrimitivePtr kPrimReshape = std::make_shared<Primitive>("Reshape");
 inline const PrimitivePtr kPrimMapCacheIdx = std::make_shared<Primitive>("MapCacheIdx");
 inline const PrimitivePtr kPrimUpdateCache = std::make_shared<Primitive>("UpdateCache");
 inline const PrimitivePtr kPrimCacheSwapTable = std::make_shared<Primitive>("CacheSwapTable");
+inline const PrimitivePtr kPrimSlice = std::make_shared<Primitive>("Slice");
 inline const PrimitivePtr kPrimTile = std::make_shared<Primitive>("Tile");
 inline const PrimitivePtr kPrimAddN = std::make_shared<Primitive>("AddN");
 inline const PrimitivePtr kPrimAccumulateNV2 = std::make_shared<Primitive>("AccumulateNV2");
--- a/mindspore/ops/_op_impl/tbe/init.py
+++ b/mindspore/ops/_op_impl/tbe/init.py
@ -193,6 +193,7 @@ from .sigmoid_grad import _sigmoid_grad_tbe
 from .resize_nearest_neighbor import _resize_nearest_neighbor_tbe
 from .resize_nearest_neighbor_grad import _resize_nearest_neighbor_grad_tbe
 from .pad_d import _pad_d_tbe
+from .pad_d_ds import _pad_d_ds_tbe
 from .arg_max_with_value import _arg_max_with_value_tbe
 from .arg_min_with_value import _arg_min_with_value_tbe
 from .smooth_l1_loss import _smooth_l1_loss_tbe
--- a/mindspore/ops/_op_impl/tbe/pad_d_ds.py
+++ b/mindspore/ops/_op_impl/tbe/pad_d_ds.py
@ -0,0 +1,41 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Pad op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+pad_d_op_info = TBERegOp("Pad") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("pad_d.so") \
+    .compute_cost(10) \
+    .kernel_name("pad_d") \
+    .partial_flag(True) \
+    .attr("paddings", "optional", "listListInt", "all") \
+    .dynamic_shape(True) \
+    .input(0, "x", False, "required", "all") \
+    .output(0, "y", False, "required", "all") \
+    .dtype_format(DataType.I8_Default, DataType.I8_Default) \
+    .dtype_format(DataType.U8_Default, DataType.U8_Default) \
+    .dtype_format(DataType.I32_Default, DataType.I32_Default) \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default) \
+    .get_op_info()
+
+
+@op_info_register(pad_d_op_info)
+def _pad_d_ds_tbe():
+    """Pad TBE register"""
+    return