!42088 fix error in GraphKernel

Merge pull request !42088 from 王禹程/fix_error
2022-09-16 06:10:32 +00:00 · 2022-09-16 06:10:32 +00:00 · 3ebaecdbb1
parent 66ba9e952b be6ea59b3e
commit 3ebaecdbb1
10 changed files with 4 additions and 176 deletions
--- a/mindspore/ccsrc/backend/common/pass/insert_tensor_move_for_ref.cc
+++ b/mindspore/ccsrc/backend/common/pass/insert_tensor_move_for_ref.cc
@ -1,42 +0,0 @@
-/**
- * Copyright 2022 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "backend/common/pass/insert_tensor_move_for_ref.h"
-#include "include/common/utils/anfalgo.h"
-#include "utils/ms_context.h"
-
-namespace mindspore {
-namespace opt {
-namespace {
-constexpr auto kNopNodeRealInputIndex = 1;
-}
-
-bool InsertTensorMoveForGraphOutputRefNode::Run(const FuncGraphPtr &graph) {
-  MS_EXCEPTION_IF_NULL(graph);
-
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  if (context_ptr->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) == kOptimizeO0) {
-    // not use somas
-    return true;
-  }
-
-  // Need to insert TensorMove if the output of RefOp is GraphOutput
-  (void)InsertRefTensorMoveForGraphOutput(graph);
-  return true;
-}
-}  // namespace opt
-}  // namespace mindspore
--- a/mindspore/ccsrc/backend/common/pass/insert_tensor_move_for_ref.h
+++ b/mindspore/ccsrc/backend/common/pass/insert_tensor_move_for_ref.h
@ -1,37 +0,0 @@
-/**
- * Copyright 2022 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
-#define MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
-
-#include <memory>
-#include <string>
-#include <vector>
-#include "backend/common/optimizer/optimizer.h"
-
-namespace mindspore {
-namespace opt {
-// When RefNode's output is a GraphOutput, need insert a TensorMove
-class BACKEND_EXPORT InsertTensorMoveForGraphOutputRefNode : public Pass {
- public:
-  InsertTensorMoveForGraphOutputRefNode() : Pass("insert_tensor_move_for_graphoutput_ref_node") {}
-  ~InsertTensorMoveForGraphOutputRefNode() override = default;
-  bool Run(const FuncGraphPtr &graph) override;
-};
-}  // namespace opt
-}  // namespace mindspore
-
-#endif  // MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
--- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_kernel_executor.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_kernel_executor.cc
@ -222,6 +222,7 @@ void AscendKernelExecutor::PreprocessBeforeRunGraph(const KernelGraphPtr &graph)
 void AscendKernelExecutor::DoSomas(const KernelGraphPtr &graph) {
  auto ms_context = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(ms_context);
+  // somas
  if (ms_context->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) != kOptimizeO0) {
    auto somas = std::make_shared<AscendSomas>();
    bool ret = somas->Assign(graph);
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_backend_optimization.cc
@ -132,7 +132,6 @@
 #include "plugin/device/ascend/optimizer/format_type/deal_ref_output.h"
 #include "plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_hccl_op.h"
 #include "plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_cascade.h"
-#include "plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_ref.h"
 #include "plugin/device/ascend/optimizer/enhancer/insert_pad_for_nms_with_mask.h"
 #include "plugin/device/ascend/optimizer/format_type/insert_transdata_for_runop.h"
 #include "plugin/device/ascend/optimizer/enhancer/insert_transpose_for_sort.h"
@ -535,7 +534,6 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
  other_pm->AddPass(std::make_shared<BroadcastFusion>());
  other_pm->AddPass(std::make_shared<DropoutGenMaskFusion>());
  other_pm->AddPass(std::make_shared<InsertTensorMoveForCascade>());
-  other_pm->AddPass(std::make_shared<InsertTensorMoveForGraphOutputRefNode>());
  other_pm->AddPass(std::make_shared<GradientsAllReduceDependLastSend>());
  other_pm->AddPass(std::make_shared<ParameterTransOpFusion>());
  other_pm->AddPass(std::make_shared<RefreshParameterFormat>());
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_cascade.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_cascade.cc
@ -123,14 +123,10 @@ void InsertTensorMoveForCascade::InsertOutputTensorMove(const FuncGraphPtr &grap
    return;
  }

-  if (!common::AnfAlgo::IsFusedCommunicationOp(hccl_node)) {
-    return;
-  }
-
  AnfNodePtr node = nullptr;
  auto outputs = common::AnfAlgo::GetAllOutputWithIndex(graph->output());
  for (const auto &output_with_index : outputs) {
-    if (!common::AnfAlgo::IsFusedCommunicationOp(output_with_index.first)) {
+    if (!common::AnfAlgo::IsCommunicationOp(output_with_index.first)) {
      continue;
    }
    auto cnode = output_with_index.first->cast<CNodePtr>();
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_ref.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_ref.cc
@ -1,47 +0,0 @@
-/**
- * Copyright 2022 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_ref.h"
-#include "include/common/utils/anfalgo.h"
-#include "utils/ms_context.h"
-
-namespace mindspore {
-namespace opt {
-namespace {
-constexpr auto kNopNodeRealInputIndex = 1;
-}
-
-bool InsertTensorMoveForGraphOutputRefNode::Run(const FuncGraphPtr &graph) {
-  MS_EXCEPTION_IF_NULL(graph);
-
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto task_sink = ms_context->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
-  auto opt_level = ms_context->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL);
-  if (!task_sink && (opt_level == kOptimizeO0)) {
-    // not use somas
-    return false;
-  }
-
-  // Need to insert TensorMove if the output of RefOp is GraphOutput
-  auto tensor_move_list = InsertRefTensorMoveForGraphOutput(graph);
-  for (auto &tensor_move : tensor_move_list) {
-    kernel_select_->SelectKernel(tensor_move->cast<CNodePtr>());
-  }
-  return true;
-}
-}  // namespace opt
-}  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_ref.h
+++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_ref.h
@ -1,39 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
-#define MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
-
-#include <memory>
-#include "backend/common/optimizer/optimizer.h"
-#include "plugin/device/ascend/optimizer/ascend_helper.h"
-
-namespace mindspore {
-namespace opt {
-// When RefNode's output is a GraphOutput, need insert a TensorMove
-class InsertTensorMoveForGraphOutputRefNode : public Pass {
- public:
-  InsertTensorMoveForGraphOutputRefNode()
-      : Pass("insert_tensor_move_for_graphoutput_ref_node"), kernel_select_(std::make_shared<KernelSelect>()) {}
-  ~InsertTensorMoveForGraphOutputRefNode() override = default;
-  bool Run(const FuncGraphPtr &graph) override;
-
- private:
-  KernelSelectPtr kernel_select_;
-};
-}  // namespace opt
-}  // namespace mindspore
-
-#endif  // MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
--- a/mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.cc
+++ b/mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.cc
@ -39,7 +39,6 @@
 #include "backend/common/pass/replace_node_by_proxy.h"
 #include "backend/common/pass/erase_visit_attr.h"
 #include "backend/common/pass/insert_tensor_move_for_communication.h"
-#include "backend/common/pass/insert_tensor_move_for_ref.h"
 #include "common/graph_kernel/adapter/graph_kernel_optimization.h"
 #include "common/graph_kernel/adapter/expander.h"
 #ifdef ENABLE_AKG
@ -179,7 +178,6 @@ void CPUKernelExecutor::OptimizeGraphImpl(const KernelGraphPtr &graph) const {
  pm->AddPass(std::make_shared<opt::InsertCastCPU>("insert_cast"));
  pm->AddPass(std::make_shared<opt::EraseVisitAttr>());
  pm->AddPass(std::make_shared<opt::InsertTensorMoveForCommunication>());
-  pm->AddPass(std::make_shared<opt::InsertTensorMoveForGraphOutputRefNode>());
  optimizer->AddPassManager(pm);
  (void)optimizer->Optimize(graph);
  graph->SetExecOrderByDefault();
@ -348,6 +346,7 @@ void CPUKernelExecutor::PreprocessBeforeRun(const FuncGraphPtr &graph) const {
  }
  auto ms_context = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(ms_context);
+  // somas
  if (ms_context->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) != kOptimizeO0) {
    auto somas = std::make_shared<CPUSomas>();
    bool ret = somas->Assign(kernel_graph);
--- a/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_device_context.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_device_context.cc
@ -265,6 +265,7 @@ void GPUKernelExecutor::PreprocessBeforeRun(const FuncGraphPtr &graph) const {
  MS_EXCEPTION_IF_NULL(kernel_graph);
  auto ms_context = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(ms_context);
+  // somas
  if (ms_context->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) != kOptimizeO0) {
    auto somas = std::make_shared<GPUSomas>();
    bool ret = somas->Assign(kernel_graph);
@ -314,7 +315,6 @@ void GPUKernelExecutor::OptimizeGraphWithDeviceInfo(const KernelGraphPtr &graph)
  pm->AddPass(std::make_shared<opt::GetitemTuple>());
  pm->AddPass(std::make_shared<opt::ReducePrecisionFusion>("reduce_precision"));
  pm->AddPass(std::make_shared<opt::InsertTensorMoveForCommunication>());
-  pm->AddPass(std::make_shared<opt::InsertTensorMoveForGraphOutputRefNode>());
  optimizer->AddPassManager(pm);
  (void)optimizer->Optimize(graph);
  graph->SetExecOrderByDefault();
--- a/mindspore/ccsrc/plugin/device/gpu/hal/hardware/optimizer.h
+++ b/mindspore/ccsrc/plugin/device/gpu/hal/hardware/optimizer.h
@ -23,7 +23,6 @@
 #include "backend/common/optimizer/common_backend_optimization.h"
 #include "backend/common/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h"
 #include "backend/common/pass/insert_tensor_move_for_communication.h"
-#include "backend/common/pass/insert_tensor_move_for_ref.h"
 #include "plugin/device/gpu/optimizer/adam_weight_decay_fusion.h"
 #include "plugin/device/gpu/optimizer/adam_fusion.h"
 #include "plugin/device/gpu/optimizer/alltoall_fusion.h"