!42088 fix error in GraphKernel

Merge pull request !42088 from 王禹程/fix_error
This commit is contained in:
i-robot 2022-09-16 06:10:32 +00:00 committed by Gitee
commit 3ebaecdbb1
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
10 changed files with 4 additions and 176 deletions

View File

@ -1,42 +0,0 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/common/pass/insert_tensor_move_for_ref.h"
#include "include/common/utils/anfalgo.h"
#include "utils/ms_context.h"
namespace mindspore {
namespace opt {
namespace {
constexpr auto kNopNodeRealInputIndex = 1;
}
bool InsertTensorMoveForGraphOutputRefNode::Run(const FuncGraphPtr &graph) {
MS_EXCEPTION_IF_NULL(graph);
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (context_ptr->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) == kOptimizeO0) {
// not use somas
return true;
}
// Need to insert TensorMove if the output of RefOp is GraphOutput
(void)InsertRefTensorMoveForGraphOutput(graph);
return true;
}
} // namespace opt
} // namespace mindspore

View File

@ -1,37 +0,0 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
#define MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
#include <memory>
#include <string>
#include <vector>
#include "backend/common/optimizer/optimizer.h"
namespace mindspore {
namespace opt {
// When RefNode's output is a GraphOutput, need insert a TensorMove
class BACKEND_EXPORT InsertTensorMoveForGraphOutputRefNode : public Pass {
public:
InsertTensorMoveForGraphOutputRefNode() : Pass("insert_tensor_move_for_graphoutput_ref_node") {}
~InsertTensorMoveForGraphOutputRefNode() override = default;
bool Run(const FuncGraphPtr &graph) override;
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H

View File

@ -222,6 +222,7 @@ void AscendKernelExecutor::PreprocessBeforeRunGraph(const KernelGraphPtr &graph)
void AscendKernelExecutor::DoSomas(const KernelGraphPtr &graph) {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
// somas
if (ms_context->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) != kOptimizeO0) {
auto somas = std::make_shared<AscendSomas>();
bool ret = somas->Assign(graph);

View File

@ -132,7 +132,6 @@
#include "plugin/device/ascend/optimizer/format_type/deal_ref_output.h"
#include "plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_hccl_op.h"
#include "plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_cascade.h"
#include "plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_ref.h"
#include "plugin/device/ascend/optimizer/enhancer/insert_pad_for_nms_with_mask.h"
#include "plugin/device/ascend/optimizer/format_type/insert_transdata_for_runop.h"
#include "plugin/device/ascend/optimizer/enhancer/insert_transpose_for_sort.h"
@ -535,7 +534,6 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
other_pm->AddPass(std::make_shared<BroadcastFusion>());
other_pm->AddPass(std::make_shared<DropoutGenMaskFusion>());
other_pm->AddPass(std::make_shared<InsertTensorMoveForCascade>());
other_pm->AddPass(std::make_shared<InsertTensorMoveForGraphOutputRefNode>());
other_pm->AddPass(std::make_shared<GradientsAllReduceDependLastSend>());
other_pm->AddPass(std::make_shared<ParameterTransOpFusion>());
other_pm->AddPass(std::make_shared<RefreshParameterFormat>());

View File

@ -123,14 +123,10 @@ void InsertTensorMoveForCascade::InsertOutputTensorMove(const FuncGraphPtr &grap
return;
}
if (!common::AnfAlgo::IsFusedCommunicationOp(hccl_node)) {
return;
}
AnfNodePtr node = nullptr;
auto outputs = common::AnfAlgo::GetAllOutputWithIndex(graph->output());
for (const auto &output_with_index : outputs) {
if (!common::AnfAlgo::IsFusedCommunicationOp(output_with_index.first)) {
if (!common::AnfAlgo::IsCommunicationOp(output_with_index.first)) {
continue;
}
auto cnode = output_with_index.first->cast<CNodePtr>();

View File

@ -1,47 +0,0 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_ref.h"
#include "include/common/utils/anfalgo.h"
#include "utils/ms_context.h"
namespace mindspore {
namespace opt {
namespace {
constexpr auto kNopNodeRealInputIndex = 1;
}
bool InsertTensorMoveForGraphOutputRefNode::Run(const FuncGraphPtr &graph) {
MS_EXCEPTION_IF_NULL(graph);
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
auto task_sink = ms_context->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
auto opt_level = ms_context->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL);
if (!task_sink && (opt_level == kOptimizeO0)) {
// not use somas
return false;
}
// Need to insert TensorMove if the output of RefOp is GraphOutput
auto tensor_move_list = InsertRefTensorMoveForGraphOutput(graph);
for (auto &tensor_move : tensor_move_list) {
kernel_select_->SelectKernel(tensor_move->cast<CNodePtr>());
}
return true;
}
} // namespace opt
} // namespace mindspore

View File

@ -1,39 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
#define MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
#include <memory>
#include "backend/common/optimizer/optimizer.h"
#include "plugin/device/ascend/optimizer/ascend_helper.h"
namespace mindspore {
namespace opt {
// When RefNode's output is a GraphOutput, need insert a TensorMove
class InsertTensorMoveForGraphOutputRefNode : public Pass {
public:
InsertTensorMoveForGraphOutputRefNode()
: Pass("insert_tensor_move_for_graphoutput_ref_node"), kernel_select_(std::make_shared<KernelSelect>()) {}
~InsertTensorMoveForGraphOutputRefNode() override = default;
bool Run(const FuncGraphPtr &graph) override;
private:
KernelSelectPtr kernel_select_;
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H

View File

@ -39,7 +39,6 @@
#include "backend/common/pass/replace_node_by_proxy.h"
#include "backend/common/pass/erase_visit_attr.h"
#include "backend/common/pass/insert_tensor_move_for_communication.h"
#include "backend/common/pass/insert_tensor_move_for_ref.h"
#include "common/graph_kernel/adapter/graph_kernel_optimization.h"
#include "common/graph_kernel/adapter/expander.h"
#ifdef ENABLE_AKG
@ -179,7 +178,6 @@ void CPUKernelExecutor::OptimizeGraphImpl(const KernelGraphPtr &graph) const {
pm->AddPass(std::make_shared<opt::InsertCastCPU>("insert_cast"));
pm->AddPass(std::make_shared<opt::EraseVisitAttr>());
pm->AddPass(std::make_shared<opt::InsertTensorMoveForCommunication>());
pm->AddPass(std::make_shared<opt::InsertTensorMoveForGraphOutputRefNode>());
optimizer->AddPassManager(pm);
(void)optimizer->Optimize(graph);
graph->SetExecOrderByDefault();
@ -348,6 +346,7 @@ void CPUKernelExecutor::PreprocessBeforeRun(const FuncGraphPtr &graph) const {
}
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
// somas
if (ms_context->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) != kOptimizeO0) {
auto somas = std::make_shared<CPUSomas>();
bool ret = somas->Assign(kernel_graph);

View File

@ -265,6 +265,7 @@ void GPUKernelExecutor::PreprocessBeforeRun(const FuncGraphPtr &graph) const {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
// somas
if (ms_context->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) != kOptimizeO0) {
auto somas = std::make_shared<GPUSomas>();
bool ret = somas->Assign(kernel_graph);
@ -314,7 +315,6 @@ void GPUKernelExecutor::OptimizeGraphWithDeviceInfo(const KernelGraphPtr &graph)
pm->AddPass(std::make_shared<opt::GetitemTuple>());
pm->AddPass(std::make_shared<opt::ReducePrecisionFusion>("reduce_precision"));
pm->AddPass(std::make_shared<opt::InsertTensorMoveForCommunication>());
pm->AddPass(std::make_shared<opt::InsertTensorMoveForGraphOutputRefNode>());
optimizer->AddPassManager(pm);
(void)optimizer->Optimize(graph);
graph->SetExecOrderByDefault();

View File

@ -23,7 +23,6 @@
#include "backend/common/optimizer/common_backend_optimization.h"
#include "backend/common/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h"
#include "backend/common/pass/insert_tensor_move_for_communication.h"
#include "backend/common/pass/insert_tensor_move_for_ref.h"
#include "plugin/device/gpu/optimizer/adam_weight_decay_fusion.h"
#include "plugin/device/gpu/optimizer/adam_fusion.h"
#include "plugin/device/gpu/optimizer/alltoall_fusion.h"