forked from mindspore-Ecosystem/mindspore
clean batch buffer in once
This commit is contained in:
parent
e0082cc4ac
commit
056d7ffc56
2
akg
2
akg
|
@ -1 +1 @@
|
||||||
Subproject commit f8f4e60bf3c435cec41cbe48fe24901277ef9556
|
Subproject commit 72b359ad457ed8f4f254c8a3bd2bde88967202fb
|
|
@ -558,7 +558,7 @@ bool AkgKernelJsonGenerator::CollectJson(const AnfNodePtr &anf_node, nlohmann::j
|
||||||
bool AkgKernelJsonGenerator::CollectFusedJson(const std::vector<AnfNodePtr> &anf_nodes,
|
bool AkgKernelJsonGenerator::CollectFusedJson(const std::vector<AnfNodePtr> &anf_nodes,
|
||||||
const std::vector<AnfNodePtr> &input_list,
|
const std::vector<AnfNodePtr> &input_list,
|
||||||
const std::vector<AnfNodePtr> &output_list, nlohmann::json *kernel_json) {
|
const std::vector<AnfNodePtr> &output_list, nlohmann::json *kernel_json) {
|
||||||
if (anf_nodes.empty() || input_list.empty()) {
|
if (anf_nodes.empty()) {
|
||||||
MS_LOG(ERROR) << "Invalid input size, anf_nodes [" << anf_nodes.size() << "], input_list [" << input_list.size()
|
MS_LOG(ERROR) << "Invalid input size, anf_nodes [" << anf_nodes.size() << "], input_list [" << input_list.size()
|
||||||
<< "].";
|
<< "].";
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -374,13 +374,10 @@ CNodePtr AtomicCleanInsertter::CreateAtomicCleanCompositeNode(const KernelGraphP
|
||||||
|
|
||||||
// Create composite op's sub-graph.
|
// Create composite op's sub-graph.
|
||||||
auto new_sub_graph = std::make_shared<FuncGraph>();
|
auto new_sub_graph = std::make_shared<FuncGraph>();
|
||||||
auto parameter = new_sub_graph->add_parameter();
|
|
||||||
parameter->set_abstract(value_node->abstract());
|
|
||||||
parameter->set_kernel_info(value_node->kernel_info_ptr());
|
|
||||||
|
|
||||||
AnfNodePtr broadcast_input_node = parameter;
|
AnfNodePtr broadcast_input_node = value_node;
|
||||||
if (dst_type == kNumberTypeFloat16) {
|
if (dst_type == kNumberTypeFloat16) {
|
||||||
AnfNodePtrList cast_inputs = {NewValueNode(prim::kPrimCast), parameter};
|
AnfNodePtrList cast_inputs = {NewValueNode(prim::kPrimCast), value_node};
|
||||||
auto cast_node_inner =
|
auto cast_node_inner =
|
||||||
CreateCNode(cast_inputs, new_sub_graph, {.format = format, .shape = {1}, .type = TypeIdToType(dst_type)});
|
CreateCNode(cast_inputs, new_sub_graph, {.format = format, .shape = {1}, .type = TypeIdToType(dst_type)});
|
||||||
AnfAlgo::SetNodeAttr("dst_type", MakeValue("float32"), cast_node_inner);
|
AnfAlgo::SetNodeAttr("dst_type", MakeValue("float32"), cast_node_inner);
|
||||||
|
@ -400,12 +397,13 @@ CNodePtr AtomicCleanInsertter::CreateAtomicCleanCompositeNode(const KernelGraphP
|
||||||
|
|
||||||
// Makeup sub-graph.
|
// Makeup sub-graph.
|
||||||
new_sub_graph->set_output(broadcast_to_node_inner);
|
new_sub_graph->set_output(broadcast_to_node_inner);
|
||||||
auto broadcast_to_composite_node = main_graph->NewCNode({NewValueNode(new_sub_graph), value_node});
|
auto broadcast_to_composite_node = main_graph->NewCNode({NewValueNode(new_sub_graph)});
|
||||||
broadcast_to_composite_node->set_abstract(broadcast_to_node_inner->abstract());
|
broadcast_to_composite_node->set_abstract(broadcast_to_node_inner->abstract());
|
||||||
SetNewKernelInfo(broadcast_to_composite_node, new_sub_graph, {value_node}, {broadcast_to_node_inner},
|
SetNewKernelInfo(broadcast_to_composite_node, new_sub_graph, {}, {broadcast_to_node_inner},
|
||||||
kernel::Processor::CUDA);
|
AnfAlgo::GetProcessor(atomic_add_node_));
|
||||||
auto graph_attr = ExtractGraphKernelName(TopoSort(new_sub_graph->get_return()), "", "atomic_clean");
|
auto graph_attr = ExtractGraphKernelName(TopoSort(new_sub_graph->get_return()), "", "atomic_clean");
|
||||||
new_sub_graph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, MakeValue(graph_attr));
|
new_sub_graph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, MakeValue(graph_attr));
|
||||||
|
new_sub_graph->set_attr("composite_type", MakeValue("atomic_clean"));
|
||||||
|
|
||||||
return broadcast_to_composite_node;
|
return broadcast_to_composite_node;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,123 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "backend/optimizer/graph_kernel/clean_all_in_once.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
#include "backend/session/anf_runtime_algorithm.h"
|
||||||
|
#include "backend/kernel_compiler/common_utils.h"
|
||||||
|
#include "backend/optimizer/graph_kernel/graph_kernel_helper.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace opt {
|
||||||
|
namespace {
|
||||||
|
ShapeVector GetValidShape(const AnfNodePtr &node) {
|
||||||
|
// Shape will not contain 1 in head.
|
||||||
|
auto shape = GetShape(node);
|
||||||
|
ShapeVector valid_shape;
|
||||||
|
bool valid = false;
|
||||||
|
for (auto s : shape) {
|
||||||
|
if (!valid && s == 1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
valid = true;
|
||||||
|
valid_shape.push_back(s);
|
||||||
|
}
|
||||||
|
return valid_shape;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsAtomicCleanNode(const AnfNodePtr &node) {
|
||||||
|
MS_EXCEPTION_IF_NULL(node);
|
||||||
|
auto cnode = node->cast<CNodePtr>();
|
||||||
|
MS_EXCEPTION_IF_NULL(cnode);
|
||||||
|
auto func_graph = GetValueNode<FuncGraphPtr>(cnode->input(kAnfPrimitiveIndex));
|
||||||
|
MS_EXCEPTION_IF_NULL(func_graph);
|
||||||
|
if (!func_graph->has_attr("composite_type")) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ctype_value = func_graph->get_attr("composite_type");
|
||||||
|
if (!ctype_value->isa<StringImm>()) {
|
||||||
|
MS_LOG(EXCEPTION) << "Attribute composite_type should be a string!";
|
||||||
|
}
|
||||||
|
auto ctype = GetValue<std::string>(ctype_value);
|
||||||
|
return ctype == "atomic_clean";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<AnfNodePtrList> SplitVectorByWidth(const AnfNodePtrList &nodes, int width) {
|
||||||
|
std::vector<AnfNodePtrList> splitted_nodes;
|
||||||
|
if (!nodes.empty()) {
|
||||||
|
return splitted_nodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
int num = (nodes.size() - 1) / width + 1;
|
||||||
|
splitted_nodes.resize(num);
|
||||||
|
for (size_t i = 0; i < nodes.size(); ++i) {
|
||||||
|
splitted_nodes[i / width].push_back(nodes[i]);
|
||||||
|
}
|
||||||
|
return splitted_nodes;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
bool CleanAllInOnce::Run(const FuncGraphPtr &func_graph) {
|
||||||
|
MS_EXCEPTION_IF_NULL(func_graph);
|
||||||
|
auto mng = func_graph->manager();
|
||||||
|
if (mng == nullptr) {
|
||||||
|
mng = Manage(func_graph, true);
|
||||||
|
func_graph->set_manager(mng);
|
||||||
|
}
|
||||||
|
auto todos = TopoSort(func_graph->get_return());
|
||||||
|
std::map<ShapeVector, AnfNodePtrList> clean_map;
|
||||||
|
std::for_each(todos.cbegin(), todos.cend(), [&clean_map](const AnfNodePtr &node) {
|
||||||
|
if (AnfAlgo::IsGraphKernel(node) && IsAtomicCleanNode(node)) {
|
||||||
|
auto valid_shape = GetValidShape(node);
|
||||||
|
auto iter = clean_map.find(valid_shape);
|
||||||
|
if (iter != clean_map.end()) {
|
||||||
|
iter->second.push_back(node);
|
||||||
|
} else {
|
||||||
|
clean_map.insert({valid_shape, {node}});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
bool changed = false;
|
||||||
|
if (!clean_map.empty()) {
|
||||||
|
for (auto iter : clean_map) {
|
||||||
|
// Do all in once is not good, so do ten in once.
|
||||||
|
auto splitted_nodes = SplitVectorByWidth(iter.second, 10);
|
||||||
|
for (auto &snodes : splitted_nodes) {
|
||||||
|
if (snodes.size() < 2) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
AnfNodePtr clean_all_node;
|
||||||
|
std::tie(clean_all_node, std::ignore) = FuseNodesToSubGraph(snodes, func_graph, "clean_all");
|
||||||
|
MS_LOG(INFO) << "Add node to clean batch buffers in once(" << clean_all_node->fullname_with_scope()
|
||||||
|
<< ") for atomic add!";
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (changed) {
|
||||||
|
mng->RemoveRoots();
|
||||||
|
mng->KeepRoots({func_graph});
|
||||||
|
}
|
||||||
|
return changed;
|
||||||
|
}
|
||||||
|
} // namespace opt
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,34 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_CLEAN_ALL_IN_ONCE_H_
|
||||||
|
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_CLEAN_ALL_IN_ONCE_H_
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include "backend/optimizer/common/pass.h"
|
||||||
|
#include "ir/func_graph.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace opt {
|
||||||
|
class CleanAllInOnce : public Pass {
|
||||||
|
public:
|
||||||
|
CleanAllInOnce() : Pass("clean_all_in_once") {}
|
||||||
|
~CleanAllInOnce() override = default;
|
||||||
|
bool Run(const FuncGraphPtr &func_graph);
|
||||||
|
};
|
||||||
|
using CleanAllInOncePtr = std::shared_ptr<CleanAllInOnce>;
|
||||||
|
} // namespace opt
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_CLEAN_ALL_IN_ONCE_H_
|
|
@ -38,6 +38,7 @@
|
||||||
#include "backend/optimizer/graph_kernel/add_atomic_clean_gpu.h"
|
#include "backend/optimizer/graph_kernel/add_atomic_clean_gpu.h"
|
||||||
#include "backend/optimizer/graph_kernel/arithmetic_simplify.h"
|
#include "backend/optimizer/graph_kernel/arithmetic_simplify.h"
|
||||||
#include "backend/optimizer/graph_kernel/basic_ops_fusion.h"
|
#include "backend/optimizer/graph_kernel/basic_ops_fusion.h"
|
||||||
|
#include "backend/optimizer/graph_kernel/clean_all_in_once.h"
|
||||||
#include "backend/optimizer/graph_kernel/eliminate_redundant_output.h"
|
#include "backend/optimizer/graph_kernel/eliminate_redundant_output.h"
|
||||||
#include "backend/optimizer/graph_kernel/tensor_promotion.h"
|
#include "backend/optimizer/graph_kernel/tensor_promotion.h"
|
||||||
#include "backend/optimizer/graph_kernel/graph_kernel_splitter.h"
|
#include "backend/optimizer/graph_kernel/graph_kernel_splitter.h"
|
||||||
|
@ -182,6 +183,7 @@ void GPUSession::GraphKernelOptimize(const std::shared_ptr<KernelGraph> &kernel_
|
||||||
// will be exposed, use GetitemTuple Pass to delete them.
|
// will be exposed, use GetitemTuple Pass to delete them.
|
||||||
pm->AddPass(std::make_shared<opt::GetitemTuple>());
|
pm->AddPass(std::make_shared<opt::GetitemTuple>());
|
||||||
pm->AddPass(std::make_shared<opt::AtomicCleanInsertter>());
|
pm->AddPass(std::make_shared<opt::AtomicCleanInsertter>());
|
||||||
|
pm->AddPass(std::make_shared<opt::CleanAllInOnce>());
|
||||||
pm->AddPass(std::make_shared<opt::BindValueToGraph>());
|
pm->AddPass(std::make_shared<opt::BindValueToGraph>());
|
||||||
optimizer->AddPassManager(pm);
|
optimizer->AddPassManager(pm);
|
||||||
(void)optimizer->Optimize(kernel_graph);
|
(void)optimizer->Optimize(kernel_graph);
|
||||||
|
|
Loading…
Reference in New Issue