!7925 fix gpu momentum fusion

Merge pull request !7925 from chenweifeng/momentum-fusion-fix
2020-10-29 10:41:15 +08:00 · 2020-10-29 10:41:15 +08:00 · ea10c7a146
parent 2ba20dc8e7 3b7e01c698
commit ea10c7a146
4 changed files with 51 additions and 3 deletions
--- a/mindspore/ccsrc/backend/optimizer/gpu/apply_momentum_scale_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/apply_momentum_scale_fusion.cc
@ -26,6 +26,28 @@

 namespace mindspore {
 namespace opt {
+bool ApplyMomentumScaleFusion::IsScalar(const BaseRef &n) {
+  if (utils::isa<AnfNodePtr>(n)) {
+    AnfNodePtr in = utils::cast<AnfNodePtr>(n);
+    MS_EXCEPTION_IF_NULL(in);
+    auto shape = in->Shape()->cast<abstract::ShapePtr>();
+    MS_EXCEPTION_IF_NULL(shape);
+    if (shape->shape().size() != 0) {
+      return false;
+    }
+    auto dtype = in->Type();
+    if (dtype->type_id() != kObjectTypeTensorType) {
+      return false;
+    }
+    auto element_type = dyn_cast<TensorType>(dtype)->element()->type_id();
+    if (element_type != kNumberTypeFloat32) {
+      return false;
+    }
+    return true;
+  }
+  return false;
+}
+
 const BaseRef ApplyMomentumScaleFusion::DefinePattern() const {
  VectorRef scale = VectorRef({prim::kPrimMul, gradient_, scale_});
  VectorRef apply_momentum =
--- a/mindspore/ccsrc/backend/optimizer/gpu/apply_momentum_scale_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/apply_momentum_scale_fusion.h
@ -18,13 +18,14 @@

 #include <memory>
 #include "backend/optimizer/common/optimizer.h"
+#include "backend/session/anf_runtime_algorithm.h"

 namespace mindspore {
 namespace opt {
 class ApplyMomentumScaleFusion : public PatternProcessPass {
 public:
  explicit ApplyMomentumScaleFusion(bool multigraph = true) : PatternProcessPass("momentum_scale_fusion", multigraph) {
-    scale_ = std::make_shared<Var>();
+    scale_ = std::make_shared<CondVar>(IsScalar);
    variable_ = std::make_shared<Var>();
    accumulation_ = std::make_shared<Var>();
    learning_rate_ = std::make_shared<Var>();
@ -36,6 +37,8 @@ class ApplyMomentumScaleFusion : public PatternProcessPass {
  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;

 private:
+  static bool IsScalar(const BaseRef &n);
+
  VarPtr scale_;
  VarPtr variable_;
  VarPtr accumulation_;
--- a/mindspore/ccsrc/backend/optimizer/gpu/apply_momentum_weight_scale_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/apply_momentum_weight_scale_fusion.cc
@ -26,6 +26,28 @@

 namespace mindspore {
 namespace opt {
+bool ApplyMomentumWeightDecayScaleFusion::IsScalar(const BaseRef &n) {
+  if (utils::isa<AnfNodePtr>(n)) {
+    AnfNodePtr in = utils::cast<AnfNodePtr>(n);
+    MS_EXCEPTION_IF_NULL(in);
+    auto shape = in->Shape()->cast<abstract::ShapePtr>();
+    MS_EXCEPTION_IF_NULL(shape);
+    if (shape->shape().size() != 0) {
+      return false;
+    }
+    auto dtype = in->Type();
+    if (dtype->type_id() != kObjectTypeTensorType) {
+      return false;
+    }
+    auto element_type = dyn_cast<TensorType>(dtype)->element()->type_id();
+    if (element_type != kNumberTypeFloat32) {
+      return false;
+    }
+    return true;
+  }
+  return false;
+}
+
 const BaseRef ApplyMomentumWeightDecayScaleFusion::DefinePattern() const {
  VectorRef weight = VectorRef(
    {prim::kPrimAddN, VectorRef({prim::kPrimMul, variable_, weight_decay_}), VectorRef({prim::kPrimCast, gradient_})});
--- a/mindspore/ccsrc/backend/optimizer/gpu/apply_momentum_weight_scale_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/apply_momentum_weight_scale_fusion.h
@ -26,7 +26,7 @@ class ApplyMomentumWeightDecayScaleFusion : public PatternProcessPass {
  explicit ApplyMomentumWeightDecayScaleFusion(bool multigraph = true)
      : PatternProcessPass("momentum_weightdecay_scale_fusion", multigraph) {
    weight_decay_ = std::make_shared<Var>();
-    scale_ = std::make_shared<Var>();
+    scale_ = std::make_shared<CondVar>(IsScalar);
    variable_ = std::make_shared<Var>();
    accumulation_ = std::make_shared<Var>();
    learning_rate_ = std::make_shared<Var>();
@ -38,9 +38,10 @@ class ApplyMomentumWeightDecayScaleFusion : public PatternProcessPass {
  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;

 private:
+  static bool IsScalar(const BaseRef &n);
+
  VarPtr weight_decay_;
  VarPtr scale_;
-
  VarPtr variable_;
  VarPtr accumulation_;
  VarPtr learning_rate_;