diff --git a/akg b/akg index f8f4e60bf3c..72b359ad457 160000 --- a/akg +++ b/akg @@ -1 +1 @@ -Subproject commit f8f4e60bf3c435cec41cbe48fe24901277ef9556 +Subproject commit 72b359ad457ed8f4f254c8a3bd2bde88967202fb diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc index 2e7c594e09a..b56723277e3 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_json_generator.cc @@ -558,7 +558,7 @@ bool AkgKernelJsonGenerator::CollectJson(const AnfNodePtr &anf_node, nlohmann::j bool AkgKernelJsonGenerator::CollectFusedJson(const std::vector &anf_nodes, const std::vector &input_list, const std::vector &output_list, nlohmann::json *kernel_json) { - if (anf_nodes.empty() || input_list.empty()) { + if (anf_nodes.empty()) { MS_LOG(ERROR) << "Invalid input size, anf_nodes [" << anf_nodes.size() << "], input_list [" << input_list.size() << "]."; return false; diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean_gpu.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean_gpu.cc index f753e8fba7d..2b73ff88404 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean_gpu.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean_gpu.cc @@ -374,13 +374,10 @@ CNodePtr AtomicCleanInsertter::CreateAtomicCleanCompositeNode(const KernelGraphP // Create composite op's sub-graph. auto new_sub_graph = std::make_shared(); - auto parameter = new_sub_graph->add_parameter(); - parameter->set_abstract(value_node->abstract()); - parameter->set_kernel_info(value_node->kernel_info_ptr()); - AnfNodePtr broadcast_input_node = parameter; + AnfNodePtr broadcast_input_node = value_node; if (dst_type == kNumberTypeFloat16) { - AnfNodePtrList cast_inputs = {NewValueNode(prim::kPrimCast), parameter}; + AnfNodePtrList cast_inputs = {NewValueNode(prim::kPrimCast), value_node}; auto cast_node_inner = CreateCNode(cast_inputs, new_sub_graph, {.format = format, .shape = {1}, .type = TypeIdToType(dst_type)}); AnfAlgo::SetNodeAttr("dst_type", MakeValue("float32"), cast_node_inner); @@ -400,12 +397,13 @@ CNodePtr AtomicCleanInsertter::CreateAtomicCleanCompositeNode(const KernelGraphP // Makeup sub-graph. new_sub_graph->set_output(broadcast_to_node_inner); - auto broadcast_to_composite_node = main_graph->NewCNode({NewValueNode(new_sub_graph), value_node}); + auto broadcast_to_composite_node = main_graph->NewCNode({NewValueNode(new_sub_graph)}); broadcast_to_composite_node->set_abstract(broadcast_to_node_inner->abstract()); - SetNewKernelInfo(broadcast_to_composite_node, new_sub_graph, {value_node}, {broadcast_to_node_inner}, - kernel::Processor::CUDA); + SetNewKernelInfo(broadcast_to_composite_node, new_sub_graph, {}, {broadcast_to_node_inner}, + AnfAlgo::GetProcessor(atomic_add_node_)); auto graph_attr = ExtractGraphKernelName(TopoSort(new_sub_graph->get_return()), "", "atomic_clean"); new_sub_graph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, MakeValue(graph_attr)); + new_sub_graph->set_attr("composite_type", MakeValue("atomic_clean")); return broadcast_to_composite_node; } diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/clean_all_in_once.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/clean_all_in_once.cc new file mode 100644 index 00000000000..4354cddee48 --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/clean_all_in_once.cc @@ -0,0 +1,123 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/optimizer/graph_kernel/clean_all_in_once.h" +#include +#include +#include +#include +#include +#include "backend/session/anf_runtime_algorithm.h" +#include "backend/kernel_compiler/common_utils.h" +#include "backend/optimizer/graph_kernel/graph_kernel_helper.h" + +namespace mindspore { +namespace opt { +namespace { +ShapeVector GetValidShape(const AnfNodePtr &node) { + // Shape will not contain 1 in head. + auto shape = GetShape(node); + ShapeVector valid_shape; + bool valid = false; + for (auto s : shape) { + if (!valid && s == 1) { + continue; + } + valid = true; + valid_shape.push_back(s); + } + return valid_shape; +} + +bool IsAtomicCleanNode(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto func_graph = GetValueNode(cnode->input(kAnfPrimitiveIndex)); + MS_EXCEPTION_IF_NULL(func_graph); + if (!func_graph->has_attr("composite_type")) { + return false; + } + + auto ctype_value = func_graph->get_attr("composite_type"); + if (!ctype_value->isa()) { + MS_LOG(EXCEPTION) << "Attribute composite_type should be a string!"; + } + auto ctype = GetValue(ctype_value); + return ctype == "atomic_clean"; +} + +std::vector SplitVectorByWidth(const AnfNodePtrList &nodes, int width) { + std::vector splitted_nodes; + if (!nodes.empty()) { + return splitted_nodes; + } + + int num = (nodes.size() - 1) / width + 1; + splitted_nodes.resize(num); + for (size_t i = 0; i < nodes.size(); ++i) { + splitted_nodes[i / width].push_back(nodes[i]); + } + return splitted_nodes; +} +} // namespace + +bool CleanAllInOnce::Run(const FuncGraphPtr &func_graph) { + MS_EXCEPTION_IF_NULL(func_graph); + auto mng = func_graph->manager(); + if (mng == nullptr) { + mng = Manage(func_graph, true); + func_graph->set_manager(mng); + } + auto todos = TopoSort(func_graph->get_return()); + std::map clean_map; + std::for_each(todos.cbegin(), todos.cend(), [&clean_map](const AnfNodePtr &node) { + if (AnfAlgo::IsGraphKernel(node) && IsAtomicCleanNode(node)) { + auto valid_shape = GetValidShape(node); + auto iter = clean_map.find(valid_shape); + if (iter != clean_map.end()) { + iter->second.push_back(node); + } else { + clean_map.insert({valid_shape, {node}}); + } + } + }); + + bool changed = false; + if (!clean_map.empty()) { + for (auto iter : clean_map) { + // Do all in once is not good, so do ten in once. + auto splitted_nodes = SplitVectorByWidth(iter.second, 10); + for (auto &snodes : splitted_nodes) { + if (snodes.size() < 2) { + continue; + } + AnfNodePtr clean_all_node; + std::tie(clean_all_node, std::ignore) = FuseNodesToSubGraph(snodes, func_graph, "clean_all"); + MS_LOG(INFO) << "Add node to clean batch buffers in once(" << clean_all_node->fullname_with_scope() + << ") for atomic add!"; + changed = true; + } + } + } + + if (changed) { + mng->RemoveRoots(); + mng->KeepRoots({func_graph}); + } + return changed; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/clean_all_in_once.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/clean_all_in_once.h new file mode 100644 index 00000000000..59a8d622f9f --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/clean_all_in_once.h @@ -0,0 +1,34 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_CLEAN_ALL_IN_ONCE_H_ +#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_CLEAN_ALL_IN_ONCE_H_ +#include +#include +#include "backend/optimizer/common/pass.h" +#include "ir/func_graph.h" + +namespace mindspore { +namespace opt { +class CleanAllInOnce : public Pass { + public: + CleanAllInOnce() : Pass("clean_all_in_once") {} + ~CleanAllInOnce() override = default; + bool Run(const FuncGraphPtr &func_graph); +}; +using CleanAllInOncePtr = std::shared_ptr; +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GRAPH_KERNEL_CLEAN_ALL_IN_ONCE_H_ diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index 994c5c5f6d5..b239ec5f304 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -38,6 +38,7 @@ #include "backend/optimizer/graph_kernel/add_atomic_clean_gpu.h" #include "backend/optimizer/graph_kernel/arithmetic_simplify.h" #include "backend/optimizer/graph_kernel/basic_ops_fusion.h" +#include "backend/optimizer/graph_kernel/clean_all_in_once.h" #include "backend/optimizer/graph_kernel/eliminate_redundant_output.h" #include "backend/optimizer/graph_kernel/tensor_promotion.h" #include "backend/optimizer/graph_kernel/graph_kernel_splitter.h" @@ -182,6 +183,7 @@ void GPUSession::GraphKernelOptimize(const std::shared_ptr &kernel_ // will be exposed, use GetitemTuple Pass to delete them. pm->AddPass(std::make_shared()); pm->AddPass(std::make_shared()); + pm->AddPass(std::make_shared()); pm->AddPass(std::make_shared()); optimizer->AddPassManager(pm); (void)optimizer->Optimize(kernel_graph);