From b1ee7d99261fe36604647a749b233e467e2f1f35 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Tue, 29 Dec 2020 19:34:32 +0800 Subject: [PATCH] Synchronize latest Ascend software suite 29 Dec 2020 --- graphengine | 2 +- .../ir_fission/dynamic_rnn_grad_fission_v2.cc | 57 ++++++++++++++++--- .../graph_ir/op_declare/rnn_declare.cc | 1 - mindspore/core/ir/tensor.cc | 2 + mindspore/ops/_grad/grad_nn_ops.py | 2 +- .../tbe/basic_lstm_cell_c_state_grad_v2.py | 2 +- mindspore/ops/operations/_grad_ops.py | 4 -- 7 files changed, 54 insertions(+), 16 deletions(-) diff --git a/graphengine b/graphengine index c762dd5dcc2..1b4f8577626 160000 --- a/graphengine +++ b/graphengine @@ -1 +1 @@ -Subproject commit c762dd5dcc207987d5b5d4ee520da3939222ec88 +Subproject commit 1b4f85776269f567d11153807ae7badc91803083 diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/dynamic_rnn_grad_fission_v2.cc b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/dynamic_rnn_grad_fission_v2.cc index d887f366138..ff3563d70b0 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/dynamic_rnn_grad_fission_v2.cc +++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/dynamic_rnn_grad_fission_v2.cc @@ -16,8 +16,10 @@ #include "backend/optimizer/ascend/ir_fission/dynamic_rnn_grad_fission_v2.h" #include #include +#include "backend/session/kernel_graph.h" #include "backend/session/anf_runtime_algorithm.h" #include "utils/trace_base.h" +#include "utils/tensor_construct_utils.h" namespace mindspore { namespace opt { @@ -46,7 +48,7 @@ void CreateTLoopNode(const FuncGraphPtr &func_graph, const CNodePtr &dynamic_rnn std::vector output0_dims{origin_input9_shape[0], 4 * (((origin_input9_shape[1] + 15) / 16) * 16)}; std::vector output1_dims{input_i_shape[1], input_i_shape[2]}; - AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat32, kNumberTypeFloat32}, {output0_dims, output1_dims}, + AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16, kNumberTypeFloat32}, {output0_dims, output1_dims}, basic_lstm_cell_c_state_grad.get()); AnfAlgo::SetNodeAttr("forget_bias", MakeValue(1.0f), basic_lstm_cell_c_state_grad); AnfAlgo::SetNodeAttr("activation", MakeValue("Tanh"), basic_lstm_cell_c_state_grad); @@ -260,7 +262,7 @@ AnfNodePtr AddLSTMInputGradNode(const FuncGraphPtr &func_graph, const CNodePtr & // Create lstm_gage_concat auto lstm_gage_concat = func_graph->NewCNode(lstm_gage_concat_input); auto origin_input7_shape = AnfAlgo::GetOutputInferShape(origin_input7, 0); - AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat32}, + AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {{origin_input7_shape[0], origin_input7_shape[1], 4 * origin_input7_shape[2]}}, lstm_gage_concat.get()); AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToLong(num_split_x)), lstm_gage_concat); @@ -413,6 +415,24 @@ AnfNodePtr CreateBatchMatMul(const FuncGraphPtr &func_graph, const AnfNodePtr &l return batch_matmul; } +AnfNodePtr CreateBatchMatMul2(const FuncGraphPtr &func_graph, const AnfNodePtr &lstm_input_grad, + const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(func_graph); + // Create node + std::vector matmul_inputs = {NewValueNode(std::make_shared(prim::kPrimBatchMatMul->name())), + node, lstm_input_grad}; + auto batch_matmul = func_graph->NewCNode(matmul_inputs); + // Set infer data type and shape + auto out_shape = {AnfAlgo::GetOutputInferShape(lstm_input_grad, 0)[0], IntToSize(1), + AnfAlgo::GetOutputInferShape(lstm_input_grad, 0)[2]}; + AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {out_shape}, batch_matmul.get()); + // Set attr + AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), batch_matmul); + AnfAlgo::SetNodeAttr("transpose_x1", MakeValue(false), batch_matmul); + AnfAlgo::SetNodeAttr("transpose_x2", MakeValue(false), batch_matmul); + return batch_matmul; +} + AnfNodePtr CreateDwReduceSum(const FuncGraphPtr &func_graph, const CNodePtr &dynamic_rnn_grad_cnode, const AnfNodePtr &batch_matmul) { MS_EXCEPTION_IF_NULL(func_graph); @@ -430,18 +450,38 @@ AnfNodePtr CreateDwReduceSum(const FuncGraphPtr &func_graph, const CNodePtr &dyn return reduce_sum; } +AnfNodePtr CreateValueNode(const FuncGraphPtr &func_graph, const CNodePtr &dynamic_rnn_grad_cnode) { + auto origin_input7 = dynamic_rnn_grad_cnode->input(8); + auto origin_input7_shape = AnfAlgo::GetOutputInferShape(origin_input7, 0); + auto t_size = origin_input7_shape[0]; + auto n_size = origin_input7_shape[1]; + + std::vector shape = {t_size, IntToSize(1), n_size}; + std::vector output_shape = {SizeToLong(t_size), SizeToLong(1), SizeToLong(n_size)}; + std::vector output_tensor = {(SizeToLong(n_size) + SizeToLong(15)) / SizeToLong(16) * SizeToLong(16) * + SizeToLong(16) * SizeToLong(t_size)}; + auto tensor = TensorConstructUtils::CreateOnesTensor(kNumberTypeFloat32, output_tensor); + auto x_abstract = std::make_shared(kFloat32, output_shape); + auto kernel_graph = func_graph->cast(); + auto value_node = kernel_graph->NewValueNode(x_abstract, tensor); + kernel_graph->AddValueNodeToGraph(value_node); + AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat32}, {shape}, value_node.get()); + return value_node; +} + AnfNodePtr CreateDbReduceSum(const FuncGraphPtr &func_graph, const CNodePtr &dynamic_rnn_grad_cnode, - const AnfNodePtr &lstm_input_grad) { + const AnfNodePtr &lstm_input_grad, const AnfNodePtr &value_node) { MS_EXCEPTION_IF_NULL(func_graph); // Create node + auto batch_matmul = CreateBatchMatMul2(func_graph, lstm_input_grad, value_node); std::vector reduce_sum_inputs = {NewValueNode(std::make_shared(prim::kPrimReduceSum->name())), - lstm_input_grad}; + batch_matmul}; auto reduce_sum = func_graph->NewCNode(reduce_sum_inputs); // Set infer data type and shape - AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(dynamic_rnn_grad_cnode, 1)}, - {AnfAlgo::GetOutputInferShape(dynamic_rnn_grad_cnode, 1)}, reduce_sum.get()); + auto out_shape = {AnfAlgo::GetOutputInferShape(lstm_input_grad, 0)[2]}; + AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat16}, {out_shape}, reduce_sum.get()); // Set attr - AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(std::vector{0, 1}), reduce_sum); + AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(std::vector{0}), reduce_sum); AnfAlgo::SetNodeAttr(kAttrKeepDims, MakeValue(false), reduce_sum); AnfAlgo::SetNodeAttr("is_backend_insert", MakeValue(true), reduce_sum); return reduce_sum; @@ -486,8 +526,9 @@ const AnfNodePtr DynamicRnnGradFissionV2::Process(const FuncGraphPtr &func_graph make_tuple_inputs.emplace_back(batch_matmul); } + auto value_node = CreateValueNode(func_graph, dynamic_rnn_grad_cnode); // create reduce_sum_2 - auto db_reduce_sum = CreateDbReduceSum(func_graph, dynamic_rnn_grad_cnode, lstm_input_grad); + auto db_reduce_sum = CreateDbReduceSum(func_graph, dynamic_rnn_grad_cnode, lstm_input_grad, value_node); make_tuple_inputs.emplace_back(db_reduce_sum); make_tuple_inputs.insert(make_tuple_inputs.end(), new_outputs.begin(), new_outputs.end()); auto make_tuple = func_graph->NewCNode(make_tuple_inputs); diff --git a/mindspore/ccsrc/transform/graph_ir/op_declare/rnn_declare.cc b/mindspore/ccsrc/transform/graph_ir/op_declare/rnn_declare.cc index a008d919ed2..f9ded5a9b2a 100644 --- a/mindspore/ccsrc/transform/graph_ir/op_declare/rnn_declare.cc +++ b/mindspore/ccsrc/transform/graph_ir/op_declare/rnn_declare.cc @@ -124,7 +124,6 @@ ATTR_MAP(DynamicGRUV2Grad) = {{"direction", ATTR_DESC(direction, AnyTraits())}, {"num_proj", ATTR_DESC(num_proj, AnyTraits())}, {"time_major", ATTR_DESC(time_major, AnyTraits())}, - {"bias_type", ATTR_DESC(bias_type, AnyTraits())}, {"gate_order", ATTR_DESC(gate_order, AnyTraits())}, {"reset_after", ATTR_DESC(reset_after, AnyTraits())}}; OUTPUT_MAP(DynamicGRUV2Grad) = {{0, OUTPUT_DESC(dw_input)}, {1, OUTPUT_DESC(dw_hidden)}, {2, OUTPUT_DESC(db_input)}, diff --git a/mindspore/core/ir/tensor.cc b/mindspore/core/ir/tensor.cc index 8107322b93c..8f97f82729a 100644 --- a/mindspore/core/ir/tensor.cc +++ b/mindspore/core/ir/tensor.cc @@ -429,6 +429,8 @@ TensorDataPtr MakeTensorData(TypeId data_type, const ShapeVector &shape, const A return std::make_shared>(shape, args...); case kNumberTypeFloat16: return std::make_shared>(shape, args...); + case kNumberTypeFloat: + return std::make_shared>(shape, args...); case kNumberTypeFloat32: return std::make_shared>(shape, args...); case kNumberTypeFloat64: diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py index f259361e056..2ed2e3ad583 100755 --- a/mindspore/ops/_grad/grad_nn_ops.py +++ b/mindspore/ops/_grad/grad_nn_ops.py @@ -956,7 +956,7 @@ def get_bprop_dynamic_rnn(self): def get_bprop_dynamic_gru_v2(self): """Grad definition for `DynamicGRUV2` operation.""" dynamic_gru_v2_grad = G.DynamicGRUV2Grad(self.direction, self.cell_depth, self.keep_prob, self.cell_clip, - self.num_proj, self.time_major, 'double_bias', self.gate_order, + self.num_proj, self.time_major, self.gate_order, self.reset_after) def bprop(x, winput, whidden, binput, bhidden, seq, init_h, out, dout): diff --git a/mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py b/mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py index 37dc160b583..6e54b077d21 100644 --- a/mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +++ b/mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py @@ -38,7 +38,7 @@ basic_lstm_cell_c_state_grad_op_info_v2 = TBERegOp("BasicLSTMCellCStateGradV2") .output(1, "dct_1", False, "required", "all") \ .dtype_format(DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ, - DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ) \ + DataType.F32_FracNZ, DataType.F16_FracNZ, DataType.F32_FracNZ) \ .dtype_format(DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ) \ diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py index 07353cf4907..8ba8fc3de23 100644 --- a/mindspore/ops/operations/_grad_ops.py +++ b/mindspore/ops/operations/_grad_ops.py @@ -1344,7 +1344,6 @@ class DynamicGRUV2Grad(PrimitiveWithInfer): cell_clip (float): A float identifying the cell clip in the op. Default: -1.0. num_proj (int): An integer identifying the num proj in the op. Default: 0. time_major (bool): A bool identifying the time major in the op. Default: True. - bias_type (str): An string identifying the type of bias_type function in the op. Default to "double_bias". gate_order (str): An string identifying the gate order in weight and bias. Default: 'rzh. 'zrh' is another option. reset_after (bool): An bool identifying whether to apply reset gate after matrix multiplication. Default: True. @@ -1402,7 +1401,6 @@ class DynamicGRUV2Grad(PrimitiveWithInfer): cell_clip=-1.0, num_proj=0, time_major=True, - bias_type="double_bias", gate_order="rzh", reset_after=True): self.cell_depth = validator.check_value_type("cell_depth", cell_depth, [int], self.name) @@ -1411,8 +1409,6 @@ class DynamicGRUV2Grad(PrimitiveWithInfer): self.num_proj = validator.check_non_negative_int(num_proj, "num_proj", self.name) self.time_major = validator.check_value_type("time_major", time_major, [bool], self.name) self.direction = validator.check_string(direction, ['UNIDIRECTIONAL'], "direction", self.name) - self.bias_type = validator.check_string(bias_type, - ['no_bias', 'single_bias', 'double_bias'], "bias_type", self.name) self.gate_order = validator.check_string(gate_order, ['zrh', 'rzh'], "gate_order", self.name) self.reset_after = validator.check_value_type("reset_after", reset_after, [bool], self.name) self.add_prim_attr("io_format", "ND")