!42088 fix error in GraphKernel
Merge pull request !42088 from 王禹程/fix_error
This commit is contained in:
commit
3ebaecdbb1
|
@ -1,42 +0,0 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/common/pass/insert_tensor_move_for_ref.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
namespace {
|
||||
constexpr auto kNopNodeRealInputIndex = 1;
|
||||
}
|
||||
|
||||
bool InsertTensorMoveForGraphOutputRefNode::Run(const FuncGraphPtr &graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
if (context_ptr->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) == kOptimizeO0) {
|
||||
// not use somas
|
||||
return true;
|
||||
}
|
||||
|
||||
// Need to insert TensorMove if the output of RefOp is GraphOutput
|
||||
(void)InsertRefTensorMoveForGraphOutput(graph);
|
||||
return true;
|
||||
}
|
||||
} // namespace opt
|
||||
} // namespace mindspore
|
|
@ -1,37 +0,0 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
|
||||
#define MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "backend/common/optimizer/optimizer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
// When RefNode's output is a GraphOutput, need insert a TensorMove
|
||||
class BACKEND_EXPORT InsertTensorMoveForGraphOutputRefNode : public Pass {
|
||||
public:
|
||||
InsertTensorMoveForGraphOutputRefNode() : Pass("insert_tensor_move_for_graphoutput_ref_node") {}
|
||||
~InsertTensorMoveForGraphOutputRefNode() override = default;
|
||||
bool Run(const FuncGraphPtr &graph) override;
|
||||
};
|
||||
} // namespace opt
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
|
|
@ -222,6 +222,7 @@ void AscendKernelExecutor::PreprocessBeforeRunGraph(const KernelGraphPtr &graph)
|
|||
void AscendKernelExecutor::DoSomas(const KernelGraphPtr &graph) {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
// somas
|
||||
if (ms_context->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) != kOptimizeO0) {
|
||||
auto somas = std::make_shared<AscendSomas>();
|
||||
bool ret = somas->Assign(graph);
|
||||
|
|
|
@ -132,7 +132,6 @@
|
|||
#include "plugin/device/ascend/optimizer/format_type/deal_ref_output.h"
|
||||
#include "plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_hccl_op.h"
|
||||
#include "plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_cascade.h"
|
||||
#include "plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_ref.h"
|
||||
#include "plugin/device/ascend/optimizer/enhancer/insert_pad_for_nms_with_mask.h"
|
||||
#include "plugin/device/ascend/optimizer/format_type/insert_transdata_for_runop.h"
|
||||
#include "plugin/device/ascend/optimizer/enhancer/insert_transpose_for_sort.h"
|
||||
|
@ -535,7 +534,6 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
|
|||
other_pm->AddPass(std::make_shared<BroadcastFusion>());
|
||||
other_pm->AddPass(std::make_shared<DropoutGenMaskFusion>());
|
||||
other_pm->AddPass(std::make_shared<InsertTensorMoveForCascade>());
|
||||
other_pm->AddPass(std::make_shared<InsertTensorMoveForGraphOutputRefNode>());
|
||||
other_pm->AddPass(std::make_shared<GradientsAllReduceDependLastSend>());
|
||||
other_pm->AddPass(std::make_shared<ParameterTransOpFusion>());
|
||||
other_pm->AddPass(std::make_shared<RefreshParameterFormat>());
|
||||
|
|
|
@ -123,14 +123,10 @@ void InsertTensorMoveForCascade::InsertOutputTensorMove(const FuncGraphPtr &grap
|
|||
return;
|
||||
}
|
||||
|
||||
if (!common::AnfAlgo::IsFusedCommunicationOp(hccl_node)) {
|
||||
return;
|
||||
}
|
||||
|
||||
AnfNodePtr node = nullptr;
|
||||
auto outputs = common::AnfAlgo::GetAllOutputWithIndex(graph->output());
|
||||
for (const auto &output_with_index : outputs) {
|
||||
if (!common::AnfAlgo::IsFusedCommunicationOp(output_with_index.first)) {
|
||||
if (!common::AnfAlgo::IsCommunicationOp(output_with_index.first)) {
|
||||
continue;
|
||||
}
|
||||
auto cnode = output_with_index.first->cast<CNodePtr>();
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/ascend/optimizer/enhancer/insert_tensor_move_for_ref.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
namespace {
|
||||
constexpr auto kNopNodeRealInputIndex = 1;
|
||||
}
|
||||
|
||||
bool InsertTensorMoveForGraphOutputRefNode::Run(const FuncGraphPtr &graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto task_sink = ms_context->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
|
||||
auto opt_level = ms_context->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL);
|
||||
if (!task_sink && (opt_level == kOptimizeO0)) {
|
||||
// not use somas
|
||||
return false;
|
||||
}
|
||||
|
||||
// Need to insert TensorMove if the output of RefOp is GraphOutput
|
||||
auto tensor_move_list = InsertRefTensorMoveForGraphOutput(graph);
|
||||
for (auto &tensor_move : tensor_move_list) {
|
||||
kernel_select_->SelectKernel(tensor_move->cast<CNodePtr>());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace opt
|
||||
} // namespace mindspore
|
|
@ -1,39 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
|
||||
#define MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
|
||||
|
||||
#include <memory>
|
||||
#include "backend/common/optimizer/optimizer.h"
|
||||
#include "plugin/device/ascend/optimizer/ascend_helper.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
// When RefNode's output is a GraphOutput, need insert a TensorMove
|
||||
class InsertTensorMoveForGraphOutputRefNode : public Pass {
|
||||
public:
|
||||
InsertTensorMoveForGraphOutputRefNode()
|
||||
: Pass("insert_tensor_move_for_graphoutput_ref_node"), kernel_select_(std::make_shared<KernelSelect>()) {}
|
||||
~InsertTensorMoveForGraphOutputRefNode() override = default;
|
||||
bool Run(const FuncGraphPtr &graph) override;
|
||||
|
||||
private:
|
||||
KernelSelectPtr kernel_select_;
|
||||
};
|
||||
} // namespace opt
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_INSERT_TENSOR_MOVE_FOR_REF_H
|
|
@ -39,7 +39,6 @@
|
|||
#include "backend/common/pass/replace_node_by_proxy.h"
|
||||
#include "backend/common/pass/erase_visit_attr.h"
|
||||
#include "backend/common/pass/insert_tensor_move_for_communication.h"
|
||||
#include "backend/common/pass/insert_tensor_move_for_ref.h"
|
||||
#include "common/graph_kernel/adapter/graph_kernel_optimization.h"
|
||||
#include "common/graph_kernel/adapter/expander.h"
|
||||
#ifdef ENABLE_AKG
|
||||
|
@ -179,7 +178,6 @@ void CPUKernelExecutor::OptimizeGraphImpl(const KernelGraphPtr &graph) const {
|
|||
pm->AddPass(std::make_shared<opt::InsertCastCPU>("insert_cast"));
|
||||
pm->AddPass(std::make_shared<opt::EraseVisitAttr>());
|
||||
pm->AddPass(std::make_shared<opt::InsertTensorMoveForCommunication>());
|
||||
pm->AddPass(std::make_shared<opt::InsertTensorMoveForGraphOutputRefNode>());
|
||||
optimizer->AddPassManager(pm);
|
||||
(void)optimizer->Optimize(graph);
|
||||
graph->SetExecOrderByDefault();
|
||||
|
@ -348,6 +346,7 @@ void CPUKernelExecutor::PreprocessBeforeRun(const FuncGraphPtr &graph) const {
|
|||
}
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
// somas
|
||||
if (ms_context->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) != kOptimizeO0) {
|
||||
auto somas = std::make_shared<CPUSomas>();
|
||||
bool ret = somas->Assign(kernel_graph);
|
||||
|
|
|
@ -265,6 +265,7 @@ void GPUKernelExecutor::PreprocessBeforeRun(const FuncGraphPtr &graph) const {
|
|||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
// somas
|
||||
if (ms_context->get_param<int>(MS_CTX_MEMORY_OPTIMIZE_LEVEL) != kOptimizeO0) {
|
||||
auto somas = std::make_shared<GPUSomas>();
|
||||
bool ret = somas->Assign(kernel_graph);
|
||||
|
@ -314,7 +315,6 @@ void GPUKernelExecutor::OptimizeGraphWithDeviceInfo(const KernelGraphPtr &graph)
|
|||
pm->AddPass(std::make_shared<opt::GetitemTuple>());
|
||||
pm->AddPass(std::make_shared<opt::ReducePrecisionFusion>("reduce_precision"));
|
||||
pm->AddPass(std::make_shared<opt::InsertTensorMoveForCommunication>());
|
||||
pm->AddPass(std::make_shared<opt::InsertTensorMoveForGraphOutputRefNode>());
|
||||
optimizer->AddPassManager(pm);
|
||||
(void)optimizer->Optimize(graph);
|
||||
graph->SetExecOrderByDefault();
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#include "backend/common/optimizer/common_backend_optimization.h"
|
||||
#include "backend/common/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h"
|
||||
#include "backend/common/pass/insert_tensor_move_for_communication.h"
|
||||
#include "backend/common/pass/insert_tensor_move_for_ref.h"
|
||||
#include "plugin/device/gpu/optimizer/adam_weight_decay_fusion.h"
|
||||
#include "plugin/device/gpu/optimizer/adam_fusion.h"
|
||||
#include "plugin/device/gpu/optimizer/alltoall_fusion.h"
|
||||
|
|
Loading…
Reference in New Issue