Convert non-scalar tensor to parameter
Add a pass `tensor_promotion`. Fix a bug in CreateKernelInfoFromNewParameter, which reset the KernelInfo by mistake. what's more: Update akg Fixbug in model_builder when reduce axis is an interger.
This commit is contained in:
parent
b3b553245f
commit
b6c2812a29
2
akg
2
akg
|
@ -1 +1 @@
|
|||
Subproject commit 03ef896b90a34ebdb7eeb3fa77d7d4252d021011
|
||||
Subproject commit f308919c39811c2c3e07fb0dcc8054a533c84cbc
|
|
@ -176,6 +176,8 @@ class CompositeGraph:
|
|||
if output.shape[i] == 1 and inputs[0].shape[i] > 1:
|
||||
red_axis.append(i)
|
||||
else:
|
||||
if isinstance(a['value'], int):
|
||||
a['value'] = [a['value']]
|
||||
for i in a['value']:
|
||||
red_axis.append(i if i >= 0 else dim_size + i)
|
||||
attr['reduce_axis'] = red_axis
|
||||
|
|
|
@ -203,6 +203,7 @@ bool FuseBasicOps(const FuncGraphPtr &kernel_graph, const std::vector<AnfNodePtr
|
|||
AnfNodePtrList outputs;
|
||||
std::tie(fg, inputs, outputs) = compile::TransformSegmentToAnfGraph(fuse_nodes);
|
||||
RemoveControlDependOut(fg, &outputs, mng);
|
||||
ConvertNonscalarTensorToParameter(fg, &inputs);
|
||||
auto fuse_new_node = CreateNewFuseCNode(kernel_graph, fg, inputs, outputs, is_before_kernel_select);
|
||||
if (!is_before_kernel_select) {
|
||||
SetNewKernelInfo(fuse_new_node, fg, inputs, outputs, AnfAlgo::GetProcessor(fuse_nodes[0]));
|
||||
|
|
|
@ -263,8 +263,9 @@ bool GenJson(const AnfNodePtrList &op_nodes, const AnfNodePtrList &inputs, const
|
|||
MS_LOG(INFO) << "Collect fusion json: " << fused_name;
|
||||
return true;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) {
|
||||
bool ConvertNonscalarTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(inputs_ptr);
|
||||
auto nodes = TopoSort(fg->get_return());
|
||||
|
||||
|
@ -284,7 +285,7 @@ void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inp
|
|||
}
|
||||
|
||||
if (vmap.empty()) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto mng = fg->manager();
|
||||
|
@ -310,11 +311,12 @@ void ConvertComplexTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inp
|
|||
|
||||
inputs.push_back(vnode);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Transform nodes(including basic and composite node) to a new graph, and collect their inputs and outputs.
|
||||
std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes,
|
||||
AnfNodePtrList *src_outputs = nullptr) {
|
||||
AnfNodePtrList *src_outputs) {
|
||||
FuncGraphPtr fg;
|
||||
AnfNodePtrList inputs;
|
||||
AnfNodePtrList outputs;
|
||||
|
@ -341,13 +343,12 @@ std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(
|
|||
}
|
||||
|
||||
EliminateMakeTuple(fg, mng);
|
||||
ConvertComplexTensorToParameter(fg, &inputs);
|
||||
ConvertNonscalarTensorToParameter(fg, &inputs);
|
||||
|
||||
outputs.clear();
|
||||
kernel::GetFuncGraphOutputNodes(fg, &outputs);
|
||||
return std::make_tuple(fg, inputs, outputs);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
|
||||
const AnfNodePtrList &outputs, kernel::Processor processor) {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <vector>
|
||||
#include <memory>
|
||||
#include <map>
|
||||
#include <tuple>
|
||||
#include <unordered_set>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "ir/anf.h"
|
||||
|
@ -37,6 +38,9 @@ constexpr auto kJsonKeyMultiGraph = "multi_graph";
|
|||
constexpr auto kJsonKeyGraphDesc = "graph_desc";
|
||||
constexpr auto kJsonKeyGraphMode = "graph_mode";
|
||||
|
||||
bool ConvertNonscalarTensorToParameter(const FuncGraphPtr &fg, AnfNodePtrList *inputs_ptr);
|
||||
std::tuple<FuncGraphPtr, AnfNodePtrList, AnfNodePtrList> MixedNodesTransToGraph(const AnfNodePtrList &fuse_nodes,
|
||||
AnfNodePtrList *src_outputs = nullptr);
|
||||
void SetNewKernelInfo(const AnfNodePtr &new_node, const FuncGraphPtr &fg, const AnfNodePtrList &inputs,
|
||||
const AnfNodePtrList &outputs, kernel::Processor processor);
|
||||
AnfNodePtrList GetExpandOuts(const AnfNodePtrList &outs);
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/optimizer/graph_kernel/tensor_promotion.h"
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/common_utils.h"
|
||||
#include "backend/optimizer/graph_kernel/graph_kernel_helper.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "ir/func_graph.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
bool TensorPromotion::Run(const FuncGraphPtr &func_graph) {
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
auto mng = func_graph->manager();
|
||||
if (mng == nullptr) {
|
||||
mng = Manage(func_graph, true);
|
||||
func_graph->set_manager(mng);
|
||||
}
|
||||
auto todos = TopoSort(func_graph->get_return());
|
||||
|
||||
bool changed = false;
|
||||
for (auto iter = todos.crbegin(); iter != todos.crend(); ++iter) {
|
||||
auto node = *iter;
|
||||
if (!AnfAlgo::IsGraphKernel(node)) {
|
||||
continue;
|
||||
}
|
||||
auto args = node->cast<CNodePtr>()->inputs();
|
||||
auto fg = GetValueNode<FuncGraphPtr>(args[kAnfPrimitiveIndex]);
|
||||
if (!ConvertNonscalarTensorToParameter(fg, &args)) {
|
||||
continue;
|
||||
}
|
||||
AnfNodePtrList inputs, outputs;
|
||||
inputs.insert(inputs.end(), args.begin() + 1, args.end());
|
||||
kernel::GetFuncGraphOutputNodes(fg, &outputs);
|
||||
auto new_cnode = CreateNewFuseCNode(func_graph, fg, inputs, outputs, false);
|
||||
SetNewKernelInfo(new_cnode, fg, inputs, outputs, AnfAlgo::GetProcessor(node));
|
||||
mng->Replace(node, new_cnode);
|
||||
changed = true;
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
} // namespace opt
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,33 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_
|
||||
#include <memory>
|
||||
#include "ir/func_graph.h"
|
||||
#include "backend/optimizer/common/pass.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
class TensorPromotion : public Pass {
|
||||
public:
|
||||
TensorPromotion() : Pass("graph_kernel_tensor_promotion") {}
|
||||
~TensorPromotion() override = default;
|
||||
bool Run(const FuncGraphPtr &func_graph);
|
||||
};
|
||||
using TensorPromotionPtr = std::shared_ptr<TensorPromotion>;
|
||||
} // namespace opt
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_TENSOR_PROMOTION_H_
|
|
@ -38,6 +38,7 @@
|
|||
#include "backend/optimizer/graph_kernel/arithmetic_simplify.h"
|
||||
#include "backend/optimizer/graph_kernel/basic_ops_fusion.h"
|
||||
#include "backend/optimizer/graph_kernel/composite_ops_fusion.h"
|
||||
#include "backend/optimizer/graph_kernel/tensor_promotion.h"
|
||||
#include "backend/optimizer/graph_kernel/graph_kernel_splitter.h"
|
||||
#include "backend/optimizer/graph_kernel/graph_kernel_expander.h"
|
||||
#include "backend/optimizer/graph_kernel/graph_kernel_cse.h"
|
||||
|
@ -164,6 +165,7 @@ void GPUSession::GraphKernelOptimize(const std::shared_ptr<KernelGraph> &kernel_
|
|||
pm->AddPass(std::make_shared<opt::GraphKernelCSE>());
|
||||
pm->AddPass(std::make_shared<opt::ArithmeticSimplify>());
|
||||
pm->AddPass(std::make_shared<opt::GraphKernelCSE>());
|
||||
pm->AddPass(std::make_shared<opt::TensorPromotion>());
|
||||
pm->AddPass(std::make_shared<opt::GraphKernelSplitter>());
|
||||
// After Simplify and Splitter, a lot of redundant getitem/maketuple
|
||||
// will be exposed, use GetitemTuple Pass to delete them.
|
||||
|
|
|
@ -395,8 +395,9 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) {
|
|||
kernel::GetValidKernelNodes(func_graph, &node_list, &input_list, &output_list);
|
||||
for (auto &anf_node : node_list) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
auto kernel_info = std::make_shared<device::KernelInfo>();
|
||||
anf_node->set_kernel_info(kernel_info);
|
||||
if (anf_node->kernel_info() == nullptr) {
|
||||
anf_node->set_kernel_info(std::make_shared<device::KernelInfo>());
|
||||
}
|
||||
auto anf_cnode = anf_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(anf_cnode);
|
||||
for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_cnode); ++i) {
|
||||
|
@ -412,8 +413,9 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) {
|
|||
}
|
||||
for (auto &anf_node : input_list) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
auto kernel_info = std::make_shared<device::KernelInfo>();
|
||||
anf_node->set_kernel_info(kernel_info);
|
||||
if (anf_node->kernel_info() == nullptr) {
|
||||
anf_node->set_kernel_info(std::make_shared<device::KernelInfo>());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue