From d471ac491e6979b5cdd54d7c7d9ed06ad965103d Mon Sep 17 00:00:00 2001 From: liuxiao93 Date: Thu, 12 Nov 2020 11:03:50 +0800 Subject: [PATCH] Adapt DynamicGRUV2 forward for Ascend new backend. --- .../kernel_compiler/tbe/tbe_kernel_build.cc | 66 ++++++++----- .../kernel_compiler/tbe/tbe_kernel_build.h | 3 + .../ascend/ascend_backend_optimization.cc | 4 + .../add_placeholder_for_dynamic_gru.cc | 82 ++++++++++++++++ .../add_placeholder_for_dynamic_gru.h | 37 ++++++++ .../insert_transpose_for_dyanmic_gru_v2.h | 40 ++++++++ .../insert_transpose_for_dynamic_gru_v2.cc | 94 +++++++++++++++++++ .../device/ascend/kernel_select_ascend.cc | 2 +- .../device/ascend/tasksink/task_generator.cc | 7 ++ mindspore/ccsrc/utils/utils.h | 5 +- mindspore/ops/_op_impl/tbe/dynamic_gru_v2.py | 4 +- .../ops/_op_impl/tbe/gru_v2_hidden_grad.py | 2 +- mindspore/ops/operations/_grad_ops.py | 9 +- mindspore/ops/operations/_inner_ops.py | 40 ++++---- tests/st/ops/ascend/test_dynamic_gru_v2.py | 43 +++++++++ tests/ut/python/ops/test_ops.py | 6 +- 16 files changed, 390 insertions(+), 54 deletions(-) create mode 100644 mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.cc create mode 100644 mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.h create mode 100644 mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dyanmic_gru_v2.h create mode 100644 mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dynamic_gru_v2.cc create mode 100644 tests/st/ops/ascend/test_dynamic_gru_v2.py diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc index 65ea2c6f40f..f9642dd1f5b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc @@ -147,6 +147,39 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr &input_ptr, size_t input_i, + std::vector *input_list) { + nlohmann::json input_desc_json; + auto in_name = input_ptr->name(); + input_desc_json[kJName] = in_name + std::to_string(input_i); + input_desc_json[kJValid] = false; + input_list->emplace_back(input_desc_json); +} + +void TbeKernelJsonCreator::GenValidInputDescJson(const std::shared_ptr &anf_node, size_t real_input_index, + bool value, const std::shared_ptr &input_ptr, + const string &op_input_name, size_t input_i, + std::vector *input_list) { + auto dtype = GetDeviceInputType(anf_node, real_input_index); + auto format = GetDeviceInputFormat(anf_node, real_input_index); + auto shape = GetDeviceInputShape(anf_node, real_input_index); + auto ori_shape = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, real_input_index); + if (ori_shape.empty()) { + ori_shape.emplace_back(1); + } + nlohmann::json input_desc_json; + input_desc_json[kJDtype] = dtype; + input_desc_json[kJName] = op_input_name + std::to_string(input_i); + input_desc_json[kJOriShape] = ori_shape; + input_desc_json[kJOriFormat] = kOpFormat_NCHW; + input_desc_json[kJShape] = shape; + input_desc_json[kJFormat] = format; + input_desc_json[kJValid] = value; + input_desc_json[kJParamType] = input_ptr->param_type(); + input_desc_json[kJRange] = tbe::TbeDynamicShapeUtil::GetInputDynamicRange(anf_node, real_input_index); + input_list->emplace_back(input_desc_json); +} + bool TbeKernelJsonCreator::GenInputDescJson(const std::shared_ptr &anf_node, size_t real_input_index, bool value, const std::shared_ptr &input_ptr, const string &op_input_name, size_t input_i, @@ -156,32 +189,19 @@ bool TbeKernelJsonCreator::GenInputDescJson(const std::shared_ptr &anf_ MS_EXCEPTION_IF_NULL(input_list); std::string op_name = AnfAlgo::GetCNodeName(anf_node); if (op_name == kDynamicRNNOpName && input_ptr->name() == "seq_length") { - nlohmann::json input_desc_json; - auto in_name = input_ptr->name(); - input_desc_json[kJName] = in_name + std::to_string(input_i); - input_desc_json[kJValid] = false; - input_list->emplace_back(input_desc_json); + GenNoneInputDescJson(input_ptr, input_i, input_list); + } else if (op_name == kDynamicGRUV2OpName) { + auto none_index = AnfAlgo::GetNodeAttr>(anf_node, "placeholder_index"); + auto item = find(none_index.begin(), none_index.end(), input_ptr->index()); + if (item != none_index.end()) { + GenNoneInputDescJson(input_ptr, input_i, input_list); + } else { + GenValidInputDescJson(anf_node, real_input_index, value, input_ptr, op_input_name, input_i, input_list); + } } else if (input_ptr->name() == "input_indices" && op_name == kTopKOpName) { TbeAdapter::GenTopKV2IndicesTensorInfo(anf_node, real_input_index, input_list, creater_type_); } else { - auto dtype = GetDeviceInputType(anf_node, real_input_index); - auto format = GetDeviceInputFormat(anf_node, real_input_index); - auto shape = GetDeviceInputShape(anf_node, real_input_index); - auto ori_shape = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, real_input_index); - if (ori_shape.empty()) { - ori_shape.emplace_back(1); - } - nlohmann::json input_desc_json; - input_desc_json[kJDtype] = dtype; - input_desc_json[kJName] = op_input_name + std::to_string(input_i); - input_desc_json[kJOriShape] = ori_shape; - input_desc_json[kJOriFormat] = kOpFormat_NCHW; - input_desc_json[kJShape] = shape; - input_desc_json[kJFormat] = format; - input_desc_json[kJValid] = value; - input_desc_json[kJParamType] = input_ptr->param_type(); - input_desc_json[kJRange] = tbe::TbeDynamicShapeUtil::GetInputDynamicRange(anf_node, real_input_index); - input_list->emplace_back(input_desc_json); + GenValidInputDescJson(anf_node, real_input_index, value, input_ptr, op_input_name, input_i, input_list); } return true; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h index 05e6c09af24..23df11d3ee6 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h @@ -111,6 +111,9 @@ class TbeKernelJsonCreator { void GenOutputList(const std::shared_ptr &anf_node, const size_t &output_obj_num, const std::shared_ptr &output_ptr, size_t *output_idx, std::vector *output_list); + void GenValidInputDescJson(const std::shared_ptr &anf_node, size_t real_input_index, bool value, + const std::shared_ptr &input_ptr, const string &op_input_name, size_t input_i, + std::vector *input_list); std::vector GetDeviceInputShape(const AnfNodePtr &anf_node, size_t real_index) const; std::string GetDeviceInputType(const AnfNodePtr &anf_node, size_t real_index) const; std::string GetDeviceInputFormat(const AnfNodePtr &anf_node, size_t real_index) const; diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc index 7924d068882..74c643fd5fb 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc @@ -63,6 +63,7 @@ #include "backend/optimizer/ascend/format_type/insert_trans_op.h" #include "backend/optimizer/ascend/format_type/dynamic_rnn_grad_reformat.h" #include "backend/optimizer/ascend/format_type/insert_transpose_for_basiclstm_op.h" +#include "backend/optimizer/ascend/format_type/insert_transpose_for_dyanmic_gru_v2.h" #include "backend/optimizer/ascend/format_type/rectify_do_mask_kernel_info.h" #include "backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h" #include "backend/optimizer/ascend/format_type/split_unsupported_transdata.h" @@ -110,6 +111,7 @@ #include "backend/optimizer/ascend/ir_fission/pack_fission.h" #include "backend/optimizer/ascend/enhancer/concat_outputs_for_all_gather.h" #include "backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_rnn.h" +#include "backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.h" #include "utils/ms_context.h" #include "backend/optimizer/graph_kernel/composite_ops_fusion.h" #include "backend/optimizer/graph_kernel/basic_ops_fusion.h" @@ -222,6 +224,7 @@ void AscendDataLayout(const std::shared_ptr &kernel_graph) data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); + data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); @@ -278,6 +281,7 @@ void AscendBackendIRFusionOptimization(const std::shared_ptrAddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); AddAscendIRFusionRulesPass(ir_fusion_pm.get()); AddAscendIRFusionPass(ir_fusion_pm.get()); diff --git a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.cc new file mode 100644 index 00000000000..a3e5eb644bc --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.cc @@ -0,0 +1,82 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.h" +#include +#include +#include "backend/optimizer/common/helper.h" +#include "backend/session/anf_runtime_algorithm.h" +#include "utils/utils.h" +#include "abstract/abstract_value.h" +#include "base/core_ops.h" + +namespace mindspore { +namespace opt { +const BaseRef InsertPlaceholderForDynamicGRUV2::DefinePattern() const { + std::shared_ptr V = std::make_shared(UnVisited); + std::shared_ptr Xs = std::make_shared(); + return VectorRef({V, Xs}); +} + +const AnfNodePtr InsertPlaceholderForDynamicGRUV2::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &) const { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(node); + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto op_name = AnfAlgo::GetCNodeName(cnode); + if (op_name != kDynamicGRUV2OpName) { + return nullptr; + } + AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node); + auto kernel_graph = func_graph->cast>(); + MS_EXCEPTION_IF_NULL(kernel_graph); + size_t input_num = AnfAlgo::GetInputTensorNum(node); + if (input_num == 0) { + return nullptr; + } + + std::vector new_inputs = {AnfAlgo::GetCNodePrimitiveNode(cnode)}; + auto none_index = AnfAlgo::GetNodeAttr>(cnode, "placeholder_index"); + size_t real_input_index = 0; + for (size_t in_idx = 0; in_idx < input_num + none_index.size(); in_idx++) { + auto item = find(none_index.begin(), none_index.end(), in_idx); + if (item != none_index.end()) { + auto value = std::make_shared(); + auto value_node = NewValueNode(value); + value_node->set_abstract(std::make_shared()); + auto new_node = kernel_graph->NewValueNode(value_node); + kernel_graph->AddValueNodeToGraph(new_node); + new_inputs.push_back(new_node); + } else { + auto input_node = AnfAlgo::GetInputNode(cnode, real_input_index); + new_inputs.push_back(input_node); + real_input_index++; + } + } + + CNodePtr new_node = nullptr; + if (kernel_graph == nullptr) { + new_node = std::make_shared(*cnode); + } else { + new_node = kernel_graph->NewCNode(cnode); + } + MS_EXCEPTION_IF_NULL(new_node); + new_node->set_inputs(new_inputs); + return new_node; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.h b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.h new file mode 100644 index 00000000000..b0d28853ba7 --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/add_placeholder_for_dynamic_gru.h @@ -0,0 +1,37 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_ADD_PLACEHOLDER_FOR_DYNAMIC_GRU_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_ADD_PLACEHOLDER_FOR_DYNAMIC_GRU_H_ + +#include +#include +#include "backend/optimizer/common/optimizer.h" +#include "backend/optimizer/ascend/ascend_helper.h" + +namespace mindspore { +namespace opt { +class InsertPlaceholderForDynamicGRUV2 : public PatternProcessPass { + public: + explicit InsertPlaceholderForDynamicGRUV2(bool multigraph = true) + : PatternProcessPass("add_placeholder_for_dynamic_gru", multigraph) {} + ~InsertPlaceholderForDynamicGRUV2() override = default; + const BaseRef DefinePattern() const override; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; +}; +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_ADD_PLACEHOLDER_FOR_DYNAMIC_GRU_H_ diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dyanmic_gru_v2.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dyanmic_gru_v2.h new file mode 100644 index 00000000000..3d367f91ae6 --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dyanmic_gru_v2.h @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_INSERT_TRANSPOSE_FOR_DYANMIC_GRU_V2_H_ +#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_INSERT_TRANSPOSE_FOR_DYANMIC_GRU_V2_H_ + +#include +#include +#include +#include "backend/optimizer/common/optimizer.h" +#include "backend/optimizer/common/helper.h" +#include "backend/optimizer/ascend/ascend_helper.h" + +namespace mindspore { +namespace opt { +class InsertTransposeForDynamicGRUV2 : public PatternProcessPass { + public: + explicit InsertTransposeForDynamicGRUV2(bool multigraph = true) + : PatternProcessPass("insert_transpose_for_dynamic_gru_v2_op", multigraph) {} + ~InsertTransposeForDynamicGRUV2() override = default; + const BaseRef DefinePattern() const override; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_INSERT_TRANSPOSE_FOR_DYANMIC_GRU_V2_H_ diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dynamic_gru_v2.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dynamic_gru_v2.cc new file mode 100644 index 00000000000..63e31f50fc6 --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/insert_transpose_for_dynamic_gru_v2.cc @@ -0,0 +1,94 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/optimizer/ascend/format_type/insert_transpose_for_dyanmic_gru_v2.h" +#include +#include +#include "utils/utils.h" +#include "backend/optimizer/ascend/ascend_helper.h" +#include "backend/session/anf_runtime_algorithm.h" +#include "runtime/device/kernel_info.h" +#include "backend/kernel_compiler/oplib/oplib.h" +#include "utils/ms_context.h" + +namespace mindspore { +namespace opt { +const BaseRef InsertTransposeForDynamicGRUV2::DefinePattern() const { + VarPtr X = std::make_shared(); + VarPtr X1 = std::make_shared(); + VarPtr Xs = std::make_shared(); + MS_EXCEPTION_IF_NULL(X); + MS_EXCEPTION_IF_NULL(X1); + MS_EXCEPTION_IF_NULL(Xs); + return VectorRef( + {prim::kPrimDynamicGRUV2, X1, VectorRef({prim::KPrimTransData, VectorRef({prim::kPrimReshape, X})}), Xs}); +} + +CNodePtr Insert(const FuncGraphPtr &func_graph, const CNodePtr &cnode) { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(cnode); + + for (size_t index = 0; index < cnode->inputs().size(); index++) { + if (index == 1 || index == 2) { + AnfNodePtr new_node = nullptr; + AnfNodePtr new_transdata_node = nullptr; + AnfNodePtr new_transpose_node = nullptr; + AnfNodePtr transdata_node = AnfAlgo::GetInputNode(cnode, index); + AnfNodePtr reshape_node = AnfAlgo::GetInputNode(transdata_node->cast(), 0); + auto input_format = AnfAlgo::GetInputFormat(transdata_node, 0); + auto output_format = AnfAlgo::GetOutputFormat(transdata_node, 0); + auto padding_axis = AnfAlgo::GetOutputReshapeType(transdata_node, 0); + KernelSelectPtr kernel_select = std::make_shared(); + // trans default to hwcn + new_transpose_node = NewTransOpNode(func_graph, AnfAlgo::GetInputNode(transdata_node->cast(), 0), + kernel_select, false, prim::kPrimTranspose->name()); + AnfAlgo::SetNodeAttr(kAttrPerm, MakeValue(std::vector{2, 3, 1, 0}), new_transpose_node); + AnfAlgo::SetNodeAttr("nop_op", MakeValue(true), new_transpose_node); + RefreshKernelBuildInfo(input_format, kOpFormat_HWCN, new_transpose_node); + // trans hwcn to output_format + new_transdata_node = + NewTransOpNode(func_graph, new_transpose_node, kernel_select, false, prim::KPrimTransData->name()); + RefreshKernelBuildInfo(kOpFormat_HWCN, output_format, new_transdata_node, padding_axis); + new_transdata_node->set_abstract(transdata_node->abstract()); + new_node = new_transdata_node; + + FuncGraphManagerPtr manager = func_graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + manager->AddFuncGraph(func_graph); + if (!manager->Replace(transdata_node, new_node)) { + MS_LOG(EXCEPTION) << "For DynamicGRUV2, manager replace node failed"; + } + } + } + return cnode; +} + +const AnfNodePtr InsertTransposeForDynamicGRUV2::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &) const { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(node); + AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node); + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto op_name = AnfAlgo::GetCNodeName(cnode); + CNodePtr new_node = nullptr; + if (op_name == kDynamicGRUV2OpName) { + new_node = Insert(func_graph, cnode); + } + return new_node; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc index fe6056ef0da..3108471cdf3 100644 --- a/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc +++ b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc @@ -59,7 +59,7 @@ bool MatchInferOutputDataType(const CNodePtr &cnode, const kernel::KernelBuildIn auto name = AnfAlgo::GetCNodeName(cnode); for (size_t input_index = 0; input_index < kernel_build_info.GetInputNum(); ++input_index) { TypeId input_origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(cnode, input_index); - if (name == kDynamicRNNOpName && input_origin_type == kMetaTypeNone) { + if ((name == kDynamicRNNOpName || name == kDynamicGRUV2OpName) && input_origin_type == kMetaTypeNone) { continue; } if (kernel_build_info.GetInputDeviceType(input_index) != input_origin_type) { diff --git a/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc b/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc index 1f27ecf889d..3378e376469 100644 --- a/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc +++ b/mindspore/ccsrc/runtime/device/ascend/tasksink/task_generator.cc @@ -133,6 +133,13 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i if (op_name == kDynamicRNNOpName && i == 3) { continue; } + if (op_name == kDynamicGRUV2OpName) { + auto none_index = AnfAlgo::GetNodeAttr>(anf_node_ptr, "placeholder_index"); + auto item = find(none_index.begin(), none_index.end(), i); + if (item != none_index.end()) { + continue; + } + } auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i); auto device_address = AnfAlgo::GetPrevNodeOutputAddr(anf_node_ptr, real_input_index); AddressPtr input = std::make_shared
(); diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h index 6530201bf7e..be6bee3aa4e 100644 --- a/mindspore/ccsrc/utils/utils.h +++ b/mindspore/ccsrc/utils/utils.h @@ -227,8 +227,8 @@ constexpr auto kBasicLSTMCellInputGradOpName = "BasicLSTMCellInputGrad"; constexpr auto kBasicLSTMCellOpName = "BasicLSTMCell"; constexpr auto kDynamicRNNOpName = "DynamicRNN"; constexpr auto kLSTMInputGradOpName = "LSTMInputGrad"; -constexpr auto kDynamicGRUOpName = "DynamicGRU"; -constexpr auto kGRUV2HiddenGrad = "GRUV2HiddenGrad"; +constexpr auto kDynamicGRUV2OpName = "DynamicGRUV2"; +constexpr auto kGRUV2HiddenGradOpName = "GRUV2HiddenGrad"; constexpr auto kFusedSparseFtrlName = "FusedSparseFtrl"; constexpr auto kFusedSparseProximalAdagradName = "FusedSparseProximalAdagrad"; constexpr auto kFusedSparseLazyAdamName = "FusedSparseLazyAdam"; @@ -239,6 +239,7 @@ constexpr auto kLARSUpdateName = "LARSUpdate"; constexpr auto kBasicLSTMCellCStateGradOpName = "BasicLSTMCellCStateGrad"; constexpr auto kBasicLSTMCellCStateGradV2OpName = "BasicLSTMCellCStateGradV2"; constexpr auto kMatMulV2OpName = "MatMulV2"; +constexpr auto kBroadcastToOpName = "BroadcastTo"; // Hcom Op Type constexpr auto kHcomOpTypeAllReduce = "HcomAllReduce"; diff --git a/mindspore/ops/_op_impl/tbe/dynamic_gru_v2.py b/mindspore/ops/_op_impl/tbe/dynamic_gru_v2.py index b3d2c99392d..8ef089e04aa 100644 --- a/mindspore/ops/_op_impl/tbe/dynamic_gru_v2.py +++ b/mindspore/ops/_op_impl/tbe/dynamic_gru_v2.py @@ -34,8 +34,8 @@ dynamic_gru_v2_op_info = TBERegOp("DynamicGRUV2") \ .attr("is_training", "optional", "bool", "all", "true") \ .partial_flag(True) \ .input(0, "x", False, "required", "all") \ - .input(1, "weight_input", False, "required", "all") \ - .input(2, "weight_hidden", False, "required", "all") \ + .input(1, "weight_input", False, "required", "all", reshape_type="CN") \ + .input(2, "weight_hidden", False, "required", "all", reshape_type="CN") \ .input(3, "bias_input", False, "optional", "all") \ .input(4, "bias_hidden", False, "optional", "all") \ .input(5, "seq_length", False, "optional", "all") \ diff --git a/mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad.py b/mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad.py index 99d04758f04..a3b02bf8717 100644 --- a/mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad.py +++ b/mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad.py @@ -22,7 +22,7 @@ gru_v2_hidden_grad_op_info = TBERegOp("GRUV2HiddenGrad") \ .binfile_name("gru_v2_hidden_grad.so") \ .compute_cost(10) \ .kernel_name("gru_v2_hidden_grad") \ - .attr("gate_order", "optional", "str", "all", "zrh") \ + .attr("gate_order", "optional", "str", "all", "rzh") \ .partial_flag(True) \ .input(0, "weight_input", False, "required", "all") \ .input(1, "init_h", False, "required", "all") \ diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py index 291d90713d6..05c18afab90 100644 --- a/mindspore/ops/operations/_grad_ops.py +++ b/mindspore/ops/operations/_grad_ops.py @@ -1210,7 +1210,7 @@ class DynamicGRUV2Grad(PrimitiveWithInfer): num_proj=0, time_major=True, bias_type="double_bias", - gate_order="zrh", + gate_order="rzh", reset_after=True): self.cell_depth = validator.check_value_type("cell_depth", cell_depth, [int], self.name) self.keep_prob = validator.check_value_type("keep_prob", keep_prob, [float], self.name) @@ -1266,12 +1266,13 @@ class DynamicGRUV2Grad(PrimitiveWithInfer): def infer_dtype(self, x_dtype, winput_dtype, whidden_dtype, y_dtype, init_h_dtype, h_dtype, dy_dtype, dh_dtype, update_dtype, reset_dtype, new_dtype, hnew_dtype, seq_dtype, mask_dtype): valid_types = (mstype.float16, mstype.float32) - args = {"y_dtype": y_dtype, "init_h_dtype": init_h_dtype, "h_dtype": h_dtype, - "dy_dtype": dy_dtype, "dh_dtype": dh_dtype, "update_dtype": update_dtype, - "reset_dtype": reset_dtype, "new_dtype": new_dtype, "hnew_dtype": hnew_dtype} + args = {"y_dtype": y_dtype, "h_dtype": h_dtype, "dy_dtype": dy_dtype, + "dh_dtype": dh_dtype, "update_dtype": update_dtype, "reset_dtype": reset_dtype, + "new_dtype": new_dtype, "hnew_dtype": hnew_dtype} validator.check_tensor_dtype_valid("x_dtype", x_dtype, valid_types, self.name) validator.check_tensor_dtype_valid("winput_dtype", winput_dtype, valid_types, self.name) validator.check_tensor_dtype_valid("whidden_dtype", whidden_dtype, valid_types, self.name) + validator.check_tensor_dtype_valid("init_h_dtype", init_h_dtype, valid_types, self.name) validator.check_tensors_dtypes_same_and_valid(args, valid_types, self.name) if seq_dtype is not None: validator.check_tensor_dtype_valid("seq_dtype", seq_dtype, valid_types, self.name) diff --git a/mindspore/ops/operations/_inner_ops.py b/mindspore/ops/operations/_inner_ops.py index 05989b9cd03..1896e27a0e6 100644 --- a/mindspore/ops/operations/_inner_ops.py +++ b/mindspore/ops/operations/_inner_ops.py @@ -549,41 +549,49 @@ class DynamicGRUV2(PrimitiveWithInfer): self.reset_after = validator.check_value_type("reset_after", reset_after, [bool], self.name) self.add_prim_attr("io_format", "ND") - def infer_shape(self, x_shape, winput_shape, whidden_shape, binput_shape, bhidden_shape, seq_shape, h_shape): + def infer_shape(self, x_shape, winput_shape, whidden_shape, + binput_shape=None, bhidden_shape=None, seq_shape=None, h_shape=None): validator.check_int(len(x_shape), 3, Rel.EQ, "x shape", self.name) validator.check_int(len(winput_shape), 2, Rel.EQ, "weight input shape rank", self.name) validator.check_int(len(whidden_shape), 2, Rel.EQ, "weight hidden shape rank", self.name) - if binput_shape is not None: - validator.check_int(len(binput_shape), 1, Rel.EQ, "bias input shape rank", self.name) - if bhidden_shape is not None: - validator.check_int(len(bhidden_shape), 1, Rel.EQ, "bias hidden shape rank", self.name) - if h_shape is not None: - validator.check_int(len(h_shape), 2, Rel.EQ, "init_h shape rank", self.name) - if seq_shape is not None: - raise ValueError(f"For {self.name}, seq_shape should be None.") num_step, batch_size, input_size = x_shape hidden_size = winput_shape[-1] // 3 - if winput_shape[-1] % 3 != 0: raise ValueError(f"For {self.name}, weight_input_shape[-1] should multiple of 3.") - validator.check("weight_input_shape[-1]", winput_shape[-1], "weight_hidden_shape[-1]", - whidden_shape[-1], Rel.EQ, self.name) - validator.check("bias_input_shape", binput_shape, "bias_hidden_shape", bhidden_shape, Rel.EQ, self.name) - validator.check("weight_input_shape[0]", winput_shape[0], "input_size", input_size, Rel.EQ, self.name) - validator.check("weight_hidden_shape[0]", whidden_shape[0], "hidden_size", hidden_size, Rel.EQ, self.name) + self.placeholder_index = [3, 4, 5, 6] + if binput_shape is not None: + validator.check_int(len(binput_shape), 1, Rel.EQ, "bias input shape rank", self.name) + validator.check("bias_input_shape", binput_shape, "3 * hidden_shape", [3 * hidden_size], Rel.EQ, self.name) + self.placeholder_index.remove(3) + if bhidden_shape is not None: + validator.check_int(len(bhidden_shape), 1, Rel.EQ, "bias hidden shape rank", self.name) + validator.check("bias_hidden_shape", bhidden_shape, + "3 * hidden_shape", [3 * hidden_size], Rel.EQ, self.name) + self.placeholder_index.remove(4) if h_shape is not None: + validator.check_int(len(h_shape), 2, Rel.EQ, "init_h shape rank", self.name) validator.check("init_h_shape[0]", h_shape[0], "batch_size", batch_size, Rel.EQ, self.name) validator.check("init_h_shape[1]", h_shape[1], "hidden_size", hidden_size, Rel.EQ, self.name) + self.placeholder_index.remove(6) + if seq_shape is not None: + raise ValueError(f"For {self.name}, seq_shape should be None.") + + validator.check("weight_input_shape[-1]", winput_shape[-1], "weight_hidden_shape[-1]", + whidden_shape[-1], Rel.EQ, self.name) + validator.check("weight_input_shape[0]", winput_shape[0], "input_size", input_size, Rel.EQ, self.name) + validator.check("weight_hidden_shape[0]", whidden_shape[0], "hidden_size", hidden_size, Rel.EQ, self.name) if self.num_proj > 0: y_shape = (num_step, batch_size, min(hidden_size, self.num_proj)) else: y_shape = (num_step, batch_size, hidden_size) outh_shape = (num_step, batch_size, hidden_size) + self.add_prim_attr("placeholder_index", self.placeholder_index) return y_shape, outh_shape, outh_shape, outh_shape, outh_shape, outh_shape - def infer_dtype(self, x_dtype, winput_dtype, whidden_dtype, binput_dtype, bhidden_dtype, seq_dtype, h_dtype): + def infer_dtype(self, x_dtype, winput_dtype, whidden_dtype, + binput_dtype=None, bhidden_dtype=None, seq_dtype=None, h_dtype=None): validator.check_tensor_dtype_valid("x dtype", x_dtype, [mstype.float16], self.name) validator.check_tensor_dtype_valid("weight input dtype", winput_dtype, [mstype.float16], self.name) validator.check_tensor_dtype_valid("weight hidden dtype", whidden_dtype, [mstype.float16], self.name) diff --git a/tests/st/ops/ascend/test_dynamic_gru_v2.py b/tests/st/ops/ascend/test_dynamic_gru_v2.py new file mode 100644 index 00000000000..6b59465e0ee --- /dev/null +++ b/tests/st/ops/ascend/test_dynamic_gru_v2.py @@ -0,0 +1,43 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + +context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + + +class DynamicGRUV2(nn.Cell): + def __init__(self): + super(DynamicGRUV2, self).__init__() + self.dynamic_gru = P.DynamicGRUV2() + + def construct(self, x, weight_i, weight_h, bias_i, bias_h, init_h): + return self.dynamic_gru(x, weight_i, weight_h, bias_i, bias_h, None, init_h) + + +def test_dynamic_gru_v2(): + x = Tensor(np.random.rand(2, 8, 64).astype(np.float16)) + weight_i = Tensor(np.random.rand(64, 48).astype(np.float16)) + weight_h = Tensor(np.random.rand(16, 48).astype(np.float16)) + bias_i = Tensor(np.random.rand(48).astype(np.float16)) + bias_h = Tensor(np.random.rand(48).astype(np.float16)) + init_h = Tensor(np.random.rand(8, 16).astype(np.float16)) + gru_net = DynamicGRUV2() + output = gru_net(x, weight_i, weight_h, bias_i, bias_h, init_h) + print(output) diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py index b4cdd45e12a..00b6aabf1de 100755 --- a/tests/ut/python/ops/test_ops.py +++ b/tests/ut/python/ops/test_ops.py @@ -2532,11 +2532,7 @@ test_case_other_ops = [ Tensor(np.random.rand(48).astype(np.float16)), Tensor(np.random.rand(48).astype(np.float16)), Tensor(np.random.rand(8, 16).astype(np.float16))], - 'desc_bprop': [Tensor(np.random.rand(2, 8, 16).astype(np.float16)), - Tensor(np.random.rand(2, 8, 16).astype(np.float16)), - Tensor(np.random.rand(2, 8, 16).astype(np.float16)), - Tensor(np.random.rand(2, 8, 16).astype(np.float16)), - Tensor(np.random.rand(2, 8, 16).astype(np.float16))]}), + 'skip': ['backward']}), ] test_case_quant_ops = [