diff --git a/graphengine b/graphengine index 21d3700f661..092c7a1f654 160000 --- a/graphengine +++ b/graphengine @@ -1 +1 @@ -Subproject commit 21d3700f661576edc37607a3bc961874ee5189a7 +Subproject commit 092c7a1f6548cac7d40e677af3498c3c49ea2bfd diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc index 35336e975b8..70ef9a54070 100644 --- a/mindspore/ccsrc/pipeline/pipeline.cc +++ b/mindspore/ccsrc/pipeline/pipeline.cc @@ -1071,7 +1071,7 @@ bool ExecutorPy::AddDFGraph(const py::dict& init_params, const std::string& phas } std::string init_graph = "init_subgraph." + net_id; std::string checkpoint_name = "save." + net_id; - if (phase == "train") { + if (phase.find("train") != std::string::npos) { (void)DfGraphManager::GetInstance().AddGraph(phase, convertor.GetComputeGraph(), {{"ge.exec.variable_acc", "1"}}); } else { (void)DfGraphManager::GetInstance().AddGraph(phase, convertor.GetComputeGraph()); diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc index 74b0695cff8..87bfc8f6d86 100755 --- a/mindspore/ccsrc/transform/convert.cc +++ b/mindspore/ccsrc/transform/convert.cc @@ -171,6 +171,7 @@ const char kNameAbsGrad[] = "AbsGrad"; const char kNameBinaryCrossEntropy[] = "BinaryCrossEntropy"; const char kNameBinaryCrossEntropyGrad[] = "BinaryCrossEntropyGrad"; const char kNameSparseApplyAdagrad[] = "SparseApplyAdagrad"; +const char kNameSparseApplyFtrlD[] = "SparseApplyFtrlD"; const char kNameSpaceToDepth[] = "SpaceToDepth"; const char kNameDepthToSpace[] = "DepthToSpace"; const char kNameSign[] = "Sign"; @@ -189,7 +190,7 @@ std::unordered_map &DfGraphConvertor::get_adpt_ma {string(kNameApplyMomentum), ADPT_DESC(ApplyMomentum)}, {string(kNameMaxPool), ADPT_DESC(MaxPool)}, {string(kNameAvgPool), ADPT_DESC(AvgPool)}, - {string(kNameTopK), ADPT_DESC(TopKV2)}, + {string(kNameTopK), ADPT_DESC(TopK)}, {string(kNamePack), ADPT_DESC(Pack)}, {string(kNameSplitD), ADPT_DESC(SplitD)}, {string(kNameAllReduce), ADPT_DESC(HcomAllReduce)}, @@ -310,7 +311,7 @@ std::unordered_map &DfGraphConvertor::get_adpt_ma {prim::kPrimMinimum->name(), ADPT_DESC(Minimum)}, {prim::kPrimSelect->name(), ADPT_DESC(Select)}, {string(kNameLessEqual), ADPT_DESC(LessEqual)}, - {prim::kPrimLogSoftmax->name(), ADPT_DESC(LogSoftmax)}, + {prim::kPrimLogSoftmax->name(), ADPT_DESC(LogSoftmaxV2)}, {string(kNameTruncatedNormal), ADPT_DESC(TruncatedNormal)}, {string(kNameStridedSliceGrad), ADPT_DESC(StridedSliceGrad)}, {prim::kPrimGelu->name(), ADPT_DESC(Gelu)}, @@ -343,7 +344,7 @@ std::unordered_map &DfGraphConvertor::get_adpt_ma {prim::kPrimMatMul->name(), ADPT_DESC(MatMul)}, {string(kNameConst), ADPT_DESC(Constant, Const)}, - {string(kNameSoftmax), ADPT_DESC(Softmax)}, + {string(kNameSoftmax), ADPT_DESC(SoftmaxV2)}, {string(kNameSoftmaxGrad), ADPT_DESC(SoftmaxGrad)}, {string(kNameParam), ADPT_DESC(Data)}, {string(kNameROIAlign), ADPT_DESC(ROIAlign)}, @@ -353,6 +354,7 @@ std::unordered_map &DfGraphConvertor::get_adpt_ma {string(kNameBinaryCrossEntropy), ADPT_DESC(BinaryCrossEntropy)}, {string(kNameBinaryCrossEntropyGrad), ADPT_DESC(BinaryCrossEntropyGrad)}, {string(kNameSparseApplyAdagrad), ADPT_DESC(SparseApplyAdagradD)}, + {string(kNameSparseApplyFtrlD), ADPT_DESC(SparseApplyFtrlD)}, {string(kNameSpaceToDepth), ADPT_DESC(SpaceToDepth)}, {string(kNameDepthToSpace), ADPT_DESC(DepthToSpace)}, {string(kNameSign), ADPT_DESC(Sign)}, @@ -1017,8 +1019,8 @@ DfGraphConvertor &DfGraphConvertor::BuildGraph() { } } - // set up dependices - MS_LOG(DEBUG) << "set up dependices"; + // set up dependencies + MS_LOG(DEBUG) << "set up dependencies"; std::vector nodes = ::mindspore::TopoSort(anf_graph_->get_return()); for (auto &it : nodes) { SetNodeInput(it); @@ -1115,8 +1117,8 @@ void DfGraphConvertor::UpdateDataOpDesc(const AnfNodePtr &it, const OperatorPtr if (desc == nullptr) { MS_LOG(ERROR) << "Update data op descriptor failed! TensorDesc is null."; } else { - (void)std::static_pointer_cast(op)->update_input_desc_data(*desc); - (void)std::static_pointer_cast(op)->update_output_desc_out(*desc); + (void)std::static_pointer_cast(op)->update_input_desc_x(*desc); + (void)std::static_pointer_cast(op)->update_output_desc_y(*desc); } } diff --git a/mindspore/ccsrc/transform/graph_runner.cc b/mindspore/ccsrc/transform/graph_runner.cc index e77b1bcd736..2bff1a740c6 100644 --- a/mindspore/ccsrc/transform/graph_runner.cc +++ b/mindspore/ccsrc/transform/graph_runner.cc @@ -135,6 +135,13 @@ Status GraphRunner::RunGraph(const RunOptions& options, const std::vectorIsGraphNeedRebuild(wrap_ptr->id_)) { + sess_->RemoveGraph(wrap_ptr->id_); + sess_->AddGraph(wrap_ptr->id_, *(wrap_ptr->graph_ptr_), wrap_ptr->options_); + } + ge::Status ret = sess_->RunGraph(wrap_ptr->id_, ge_inputs, ge_outputs); if (ret != ge::GRAPH_SUCCESS) { MS_LOG(ERROR) << "Call GE RunGraph Failed, ret is: " << ret; diff --git a/mindspore/ccsrc/transform/op_declare.cc b/mindspore/ccsrc/transform/op_declare.cc index 78b949c5255..07c5e9f5fec 100755 --- a/mindspore/ccsrc/transform/op_declare.cc +++ b/mindspore/ccsrc/transform/op_declare.cc @@ -138,11 +138,10 @@ OUTPUT_MAP(ApplyMomentum) = {{0, OUTPUT_DESC(var)}}; INPUT_MAP(Summary) = {{2, INPUT_DESC(x)}}; ATTR_MAP(Summary) = EMPTY_ATTR_MAP; -// data +// Data INPUT_MAP(Data) = EMPTY_INPUT_MAP; ATTR_MAP(Data) = EMPTY_ATTR_MAP; -// resnet ops in ge // BatchNorm INPUT_MAP(BatchNorm) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(scale)}, @@ -194,9 +193,9 @@ OUTPUT_MAP(PRelu) = {{0, OUTPUT_DESC(y)}}; // PReluGrad INPUT_MAP(PReluGrad) = { - {1, INPUT_DESC(input_gradients)}, {2, INPUT_DESC(input_features)}, {3, INPUT_DESC(input_weights)}}; + {1, INPUT_DESC(grads)}, {2, INPUT_DESC(features)}, {3, INPUT_DESC(weights)}}; ATTR_MAP(PReluGrad) = EMPTY_ATTR_MAP; -OUTPUT_MAP(PReluGrad) = {{0, OUTPUT_DESC(output_backprops_dx)}, {1, OUTPUT_DESC(output_backprops_da)}}; +OUTPUT_MAP(PReluGrad) = {{0, OUTPUT_DESC(dx)}, {1, OUTPUT_DESC(da)}}; // Sigmoid INPUT_MAP(Sigmoid) = {{1, INPUT_DESC(x)}}; @@ -241,12 +240,12 @@ ATTR_MAP(CumsumD) = {{"exclusive", ATTR_DESC(exclusive, AnyTraits())}, {"reverse", ATTR_DESC(reverse, AnyTraits())}}; OUTPUT_MAP(CumsumD) = {{0, OUTPUT_DESC(y)}}; -// softmax -INPUT_MAP(Softmax) = {{1, INPUT_DESC(x)}}; -ATTR_MAP(Softmax) = { - {"axis", ATTR_DESC(axis, AnyTraits>(), AnyTraits>())}, +// SoftmaxV2 +INPUT_MAP(SoftmaxV2) = {{1, INPUT_DESC(x)}}; +ATTR_MAP(SoftmaxV2) = { + {"axis", ATTR_DESC(axes, AnyTraits>(), AnyTraits>())}, }; -OUTPUT_MAP(Softmax) = {{0, OUTPUT_DESC(y)}}; +OUTPUT_MAP(SoftmaxV2) = {{0, OUTPUT_DESC(y)}}; // SoftmaxGrad INPUT_MAP(SoftmaxGrad) = {{1, INPUT_DESC(softmax)}, {2, INPUT_DESC(grad_softmax)}}; @@ -269,21 +268,21 @@ ATTR_MAP(GatherV2) = EMPTY_ATTR_MAP; OUTPUT_MAP(GatherV2) = {{0, OUTPUT_DESC(y)}}; // ReduceSum -INPUT_MAP(ReduceSum) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(axis)}}; +INPUT_MAP(ReduceSum) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(axes)}}; ATTR_MAP(ReduceSum) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits())}}; OUTPUT_MAP(ReduceSum) = {{0, OUTPUT_DESC(y)}}; // ReduceSumD INPUT_MAP(ReduceSumD) = {{1, INPUT_DESC(x)}}; INPUT_ATTR_MAP(ReduceSumD) = { - {2, ATTR_DESC(axis, AnyTraits>(), AnyTraits>())}}; + {2, ATTR_DESC(axes, AnyTraits>(), AnyTraits>())}}; ATTR_MAP(ReduceSumD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits())}}; OUTPUT_MAP(ReduceSumD) = {{0, OUTPUT_DESC(y)}}; // ReduceProdD INPUT_MAP(ReduceProdD) = {{1, INPUT_DESC(x)}}; INPUT_ATTR_MAP(ReduceProdD) = { - {2, ATTR_DESC(axis, AnyTraits>(), AnyTraits>())}}; + {2, ATTR_DESC(axes, AnyTraits>(), AnyTraits>())}}; ATTR_MAP(ReduceProdD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits())}}; OUTPUT_MAP(ReduceProdD) = {{0, OUTPUT_DESC(y)}}; @@ -294,7 +293,7 @@ ATTR_MAP(CumprodD) = {{"exclusive", ATTR_DESC(exclusive, AnyTraits())}, {"reverse", ATTR_DESC(reverse, AnyTraits())}}; OUTPUT_MAP(CumprodD) = {{0, OUTPUT_DESC(y)}}; -// SoftmaxCrossEntropyWithLogits/ +// SoftmaxCrossEntropyWithLogits INPUT_MAP(SoftmaxCrossEntropyWithLogits) = {{1, INPUT_DESC(features)}, {2, INPUT_DESC(labels)}}; ATTR_MAP(SoftmaxCrossEntropyWithLogits) = EMPTY_ATTR_MAP; OUTPUT_MAP(SoftmaxCrossEntropyWithLogits) = {{0, OUTPUT_DESC(loss)}, {1, OUTPUT_DESC(backprop)}}; @@ -306,7 +305,7 @@ INPUT_ATTR_MAP(MeanGrad) = {{2, ATTR_DESC(mean_grad_output_shape_value, kOpForma ATTR_MAP(MeanGrad) = {{"mode", ATTR_DESC(mode, AnyTraits())}}; INPUT_MAP(SliceD) = {{1, INPUT_DESC(x)}}; -INPUT_ATTR_MAP(SliceD) = {{2, ATTR_DESC(begin, AnyTraits(), AnyTraits>())}, +INPUT_ATTR_MAP(SliceD) = {{2, ATTR_DESC(offsets, AnyTraits(), AnyTraits>())}, {3, ATTR_DESC(size, AnyTraits(), AnyTraits>())}}; ATTR_MAP(SliceD) = EMPTY_ATTR_MAP; OUTPUT_MAP(SliceD) = {{0, OUTPUT_DESC(y)}}; @@ -401,42 +400,10 @@ ATTR_MAP(BoundingBoxDecode) = { }; OUTPUT_MAP(BoundingBoxDecode) = {{0, OUTPUT_DESC(bboxes)}}; -#ifdef VALID_CODE - -// Less -INPUT_MAP(Less) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}}; -ATTR_MAP(Less) = EMPTY_ATTR_MAP; -OUTPUT_MAP(Less) = {{0, OUTPUT_DESC(z)}}; - -// Cast -INPUT_MAP(Cast) = {{1, INPUT_DESC(x)}}; -INPUT_ATTR_MAP(Cast) = {{2, ATTR_DESC(dst_type, AnyTraits())}}; -ATTR_MAP(Cast) = {{"Truncate", ATTR_DESC(truncate, AnyTraits())}}; -OUTPUT_MAP(Cast) = {{0, OUTPUT_DESC(y)}}; - -// Minimum -INPUT_MAP(Minimum) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}}; -ATTR_MAP(Minimum) = {{"alpha", ATTR_DESC(alpha, AnyTraits())}, {"beta", ATTR_DESC(beta, AnyTraits())}}; -OUTPUT_MAP(Minimum) = {{0, OUTPUT_DESC(z)}}; - -// Sub -INPUT_MAP(Sub) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}}; -ATTR_MAP(Sub) = {{"alpha", ATTR_DESC(alpha, AnyTraits())}, {"beta", ATTR_DESC(beta, AnyTraits())}}; - -#endif - -// TopKV2 -INPUT_MAP(TopKV2) = { - {1, INPUT_DESC(input)}, - {2, INPUT_DESC(k)}, -}; - -ATTR_MAP(TopKV2) = {{"T", ATTR_DESC(T, AnyTraits())}, {"sorted", ATTR_DESC(sorted, AnyTraits())}}; - -OUTPUT_MAP(TopKV2) = { - {0, OUTPUT_DESC(values)}, - {1, OUTPUT_DESC(indices)}, -}; +// TopK +INPUT_MAP(TopK) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(k)}}; +ATTR_MAP(TopK) = {{"sorted", ATTR_DESC(sorted, AnyTraits())}}; +OUTPUT_MAP(TopK) = {{0, OUTPUT_DESC(values)}, {1, OUTPUT_DESC(indices)}}; // Multiply INPUT_MAP(Multiply) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}}; @@ -476,7 +443,7 @@ ATTR_MAP(Iou) = {{"mode", ATTR_DESC(mode, AnyTraits())}}; OUTPUT_MAP(Iou) = {{0, OUTPUT_DESC(overlap)}}; // ResizeNearestNeighborD -INPUT_MAP(ResizeNearestNeighborD) = {{1, INPUT_DESC(images)}}; +INPUT_MAP(ResizeNearestNeighborD) = {{1, INPUT_DESC(x)}}; ATTR_MAP(ResizeNearestNeighborD) = { {"size", ATTR_DESC(size, AnyTraits>(), AnyTraits>())}, {"align_corners", ATTR_DESC(align_corners, AnyTraits())}}; @@ -506,17 +473,17 @@ ATTR_MAP(Relu6) = EMPTY_ATTR_MAP; OUTPUT_MAP(Relu6) = {{0, OUTPUT_DESC(activations)}}; // Relu6Grad -INPUT_MAP(Relu6Grad) = {{1, INPUT_DESC(dy)}, {2, INPUT_DESC(y)}}; +INPUT_MAP(Relu6Grad) = {{1, INPUT_DESC(features)}, {2, INPUT_DESC(gradients)}}; ATTR_MAP(Relu6Grad) = EMPTY_ATTR_MAP; -OUTPUT_MAP(Relu6Grad) = {{0, OUTPUT_DESC(z)}}; +OUTPUT_MAP(Relu6Grad) = {{0, OUTPUT_DESC(backprops)}}; // ResizeBilinearGrad INPUT_MAP(ResizeBilinearGrad) = {{1, INPUT_DESC(grads)}, {2, INPUT_DESC(original_image)}}; ATTR_MAP(ResizeBilinearGrad) = {{"align_corners", ATTR_DESC(align_corners, AnyTraits())}}; OUTPUT_MAP(ResizeBilinearGrad) = {{0, OUTPUT_DESC(y)}}; -// ResizeBilinear -INPUT_MAP(ResizeBilinearD) = {{1, INPUT_DESC(images)}}; +// ResizeBilinearD +INPUT_MAP(ResizeBilinearD) = {{1, INPUT_DESC(x)}}; ATTR_MAP(ResizeBilinearD) = { {"size", ATTR_DESC(size, AnyTraits>(), AnyTraits>())}, {"align_corners", ATTR_DESC(align_corners, AnyTraits())}}; @@ -539,9 +506,9 @@ OUTPUT_MAP(NMSWithMask) = { {0, OUTPUT_DESC(selected_boxes)}, {1, OUTPUT_DESC(selected_idx)}, {2, OUTPUT_DESC(selected_mask)}}; // Unpack -INPUT_MAP(Unpack) = {{1, INPUT_DESC(value)}}; +INPUT_MAP(Unpack) = {{1, INPUT_DESC(x)}}; ATTR_MAP(Unpack) = {{"axis", ATTR_DESC(axis, AnyTraits())}, {"num", ATTR_DESC(num, AnyTraits())}}; -DYN_OUTPUT_MAP(Unpack) = {{0, DYN_OUTPUT_DESC(output)}}; +DYN_OUTPUT_MAP(Unpack) = {{0, DYN_OUTPUT_DESC(y)}}; // ScatterNdUpdate INPUT_MAP(ScatterNdUpdate) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(updates)}}; @@ -574,8 +541,8 @@ INPUT_MAP(SigmoidCrossEntropyWithLogitsGrad) = { ATTR_MAP(SigmoidCrossEntropyWithLogitsGrad) = EMPTY_ATTR_MAP; OUTPUT_MAP(SigmoidCrossEntropyWithLogitsGrad) = {{0, OUTPUT_DESC(gradient)}}; -// ScatterNd -INPUT_MAP(ScatterNdD) = {{1, INPUT_DESC(indices)}, {2, INPUT_DESC(updates)}}; +// ScatterNdD +INPUT_MAP(ScatterNdD) = {{1, INPUT_DESC(indices)}, {2, INPUT_DESC(x)}}; INPUT_ATTR_MAP(ScatterNdD) = { {3, ATTR_DESC(shape, AnyTraits>(), AnyTraits>())}}; ATTR_MAP(ScatterNdD) = EMPTY_ATTR_MAP; @@ -587,7 +554,7 @@ ATTR_MAP(PadD) = {{"paddings", ATTR_DESC(paddings, AnyTraits())}, - {"output_type", ATTR_DESC(output_type, AnyTraits())}}; + {"output_type", ATTR_DESC(dtype, AnyTraits())}}; OUTPUT_MAP(ArgMaxD) = {{0, OUTPUT_DESC(y)}}; // ArgMinD INPUT_MAP(ArgMinD) = {{1, INPUT_DESC(x)}}; ATTR_MAP(ArgMinD) = {{"axis", ATTR_DESC(dimension, AnyTraits())}, - {"output_type", ATTR_DESC(output_type, AnyTraits())}}; + {"output_type", ATTR_DESC(dtype, AnyTraits())}}; OUTPUT_MAP(ArgMinD) = {{0, OUTPUT_DESC(y)}}; // ArgMaxWithValue @@ -634,14 +601,14 @@ ATTR_MAP(ArgMinWithValue) = {{"axis", ATTR_DESC(dimension, AnyTraits())}, OUTPUT_MAP(ArgMinWithValue) = {{0, OUTPUT_DESC(indice)}, {1, OUTPUT_DESC(values)}}; // ReduceAll -INPUT_MAP(ReduceAll) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(axis)}}; +INPUT_MAP(ReduceAll) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(axes)}}; ATTR_MAP(ReduceAll) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits())}}; OUTPUT_MAP(ReduceAll) = {{0, OUTPUT_DESC(y)}}; // ReduceMeanD INPUT_MAP(ReduceMeanD) = {{1, INPUT_DESC(x)}}; INPUT_ATTR_MAP(ReduceMeanD) = { - {2, ATTR_DESC(axis, AnyTraits>(), AnyTraits>())}}; + {2, ATTR_DESC(axes, AnyTraits>(), AnyTraits>())}}; ATTR_MAP(ReduceMeanD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits())}}; OUTPUT_MAP(ReduceMeanD) = {{0, OUTPUT_DESC(y)}}; @@ -708,11 +675,12 @@ INPUT_MAP(BiasAddGrad) = {{1, INPUT_DESC(x)}}; ATTR_MAP(BiasAddGrad) = {{"data_format", ATTR_DESC(data_format, AnyTraits())}}; OUTPUT_MAP(BiasAddGrad) = {{0, OUTPUT_DESC(y)}}; -// maxpoolgrad +// MaxPoolGrad INPUT_MAP(MaxPoolGrad) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}, {3, INPUT_DESC(grad)}}; ATTR_MAP(MaxPoolGrad) = {{"ksize", ATTR_DESC(ksize, AnyTraits(), AnyTraits>())}, {"strides", ATTR_DESC(strides, AnyTraits(), AnyTraits>())}, - {"padding", ATTR_DESC(padding, AnyTraits())}}; + {"padding", ATTR_DESC(padding, AnyTraits())}, + {"data_format", ATTR_DESC(data_format, AnyTraits())}}; OUTPUT_MAP(MaxPoolGrad) = {{0, OUTPUT_DESC(y)}}; // avgpoolgrad @@ -739,28 +707,34 @@ ATTR_MAP(Conv2D) = { {"stride", ATTR_DESC(strides, "pad", AnyTraits>())}, {"pad_list", ATTR_DESC(pads, AnyTraits>(), AnyTraits>())}, {"dilation", ATTR_DESC(dilations, "pad", AnyTraits>())}, + {"data_format", ATTR_DESC(data_format, AnyTraits())}, + {"group", ATTR_DESC(groups, AnyTraits())} }; OUTPUT_MAP(Conv2D) = {{0, OUTPUT_DESC(y)}}; // Conv2DBackpropInputD -INPUT_MAP(Conv2DBackpropInputD) = {{1, INPUT_DESC(out_backprop)}, {2, INPUT_DESC(filters)}}; +INPUT_MAP(Conv2DBackpropInputD) = {{1, INPUT_DESC(out_backprop)}, {2, INPUT_DESC(filter)}}; INPUT_ATTR_MAP(Conv2DBackpropInputD) = { - {3, ATTR_DESC(input_sizes, AnyTraits>(), AnyTraits>())}}; + {3, ATTR_DESC(input_size, AnyTraits>(), AnyTraits>())}}; ATTR_MAP(Conv2DBackpropInputD) = { {"pad_list", ATTR_DESC(pads, AnyTraits>(), AnyTraits>())}, - {"stride", ATTR_DESC(strides, "strides", AnyTraits>())}, + {"stride", ATTR_DESC(strides, "pad", AnyTraits>())}, {"dilation", ATTR_DESC(dilations, "pad", AnyTraits>())}, + {"data_format", ATTR_DESC(data_format, AnyTraits())}, + {"group", ATTR_DESC(groups, AnyTraits())} }; OUTPUT_MAP(Conv2DBackpropInputD) = {{0, OUTPUT_DESC(y)}}; // Conv2DBackpropFilterD INPUT_MAP(Conv2DBackpropFilterD) = {{1, INPUT_DESC(out_backprop)}, {2, INPUT_DESC(x)}}; INPUT_ATTR_MAP(Conv2DBackpropFilterD) = { - {3, ATTR_DESC(filter_sizes, AnyTraits>(), AnyTraits>())}}; + {3, ATTR_DESC(filter_size, AnyTraits>(), AnyTraits>())}}; ATTR_MAP(Conv2DBackpropFilterD) = { {"pad_list", ATTR_DESC(pads, AnyTraits>(), AnyTraits>())}, - {"stride", ATTR_DESC(strides, "strides", AnyTraits>())}, + {"stride", ATTR_DESC(strides, "pad", AnyTraits>())}, {"dilation", ATTR_DESC(dilations, "pad", AnyTraits>())}, + {"data_format", ATTR_DESC(data_format, AnyTraits())}, + {"group", ATTR_DESC(groups, AnyTraits())} }; OUTPUT_MAP(Conv2DBackpropFilterD) = {{0, OUTPUT_DESC(y)}}; @@ -798,8 +772,8 @@ OUTPUT_MAP(DepthwiseConv2DBackpropFilterD) = {{0, OUTPUT_DESC(filter_grad)}}; // MatMul INPUT_MAP(MatMul) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}}; -ATTR_MAP(MatMul) = {{"transpose_a", ATTR_DESC(transpose_a, AnyTraits())}, - {"transpose_b", ATTR_DESC(transpose_b, AnyTraits())}}; +ATTR_MAP(MatMul) = {{"transpose_a", ATTR_DESC(transpose_x1, AnyTraits())}, + {"transpose_b", ATTR_DESC(transpose_x2, AnyTraits())}}; OUTPUT_MAP(MatMul) = {{0, OUTPUT_DESC(y)}}; // Merge @@ -846,10 +820,10 @@ ATTR_MAP(Sub) = EMPTY_ATTR_MAP; OUTPUT_MAP(Sub) = {{0, OUTPUT_DESC(y)}}; // SplitD -INPUT_MAP(SplitD) = {{1, INPUT_DESC(value)}}; +INPUT_MAP(SplitD) = {{1, INPUT_DESC(x)}}; ATTR_MAP(SplitD) = {{"axis", ATTR_DESC(split_dim, AnyTraits())}, {"output_num", ATTR_DESC(num_split, AnyTraits())}}; -DYN_OUTPUT_MAP(SplitD) = {{0, DYN_OUTPUT_DESC(output)}}; +DYN_OUTPUT_MAP(SplitD) = {{0, DYN_OUTPUT_DESC(y)}}; // Neg INPUT_MAP(Neg) = {{1, INPUT_DESC(x)}}; @@ -876,12 +850,12 @@ OUTPUT_MAP(Pack) = {{0, OUTPUT_DESC(y)}}; // ConcatD INPUT_MAP(ConcatD) = EMPTY_INPUT_MAP; -DYN_INPUT_MAP(ConcatD) = {{1, DYN_INPUT_DESC(input_values)}}; +DYN_INPUT_MAP(ConcatD) = {{1, DYN_INPUT_DESC(x)}}; ATTR_MAP(ConcatD) = { {"axis", ATTR_DESC(concat_dim, AnyTraits())}, {"inputNums", ATTR_DESC(N, AnyTraits())}, }; -OUTPUT_MAP(ConcatD) = {{0, OUTPUT_DESC(output_data)}}; +OUTPUT_MAP(ConcatD) = {{0, OUTPUT_DESC(y)}}; // Less INPUT_MAP(Less) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}}; @@ -916,14 +890,14 @@ OUTPUT_MAP(TanhGrad) = {{0, OUTPUT_DESC(z)}}; // ReduceMinD INPUT_MAP(ReduceMinD) = {{1, INPUT_DESC(x)}}; INPUT_ATTR_MAP(ReduceMinD) = { - {2, ATTR_DESC(axis, AnyTraits>(), AnyTraits>())}}; + {2, ATTR_DESC(axes, AnyTraits>(), AnyTraits>())}}; ATTR_MAP(ReduceMinD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits())}}; OUTPUT_MAP(ReduceMinD) = {{0, OUTPUT_DESC(y)}}; // ReduceMaxD INPUT_MAP(ReduceMaxD) = {{1, INPUT_DESC(x)}}; INPUT_ATTR_MAP(ReduceMaxD) = { - {2, ATTR_DESC(axis, AnyTraits>(), AnyTraits>())}}; + {2, ATTR_DESC(axes, AnyTraits>(), AnyTraits>())}}; ATTR_MAP(ReduceMaxD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits())}}; OUTPUT_MAP(ReduceMaxD) = {{0, OUTPUT_DESC(y)}}; @@ -1008,11 +982,11 @@ INPUT_MAP(LessEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}}; ATTR_MAP(LessEqual) = EMPTY_ATTR_MAP; OUTPUT_MAP(LessEqual) = {{0, OUTPUT_DESC(y)}}; -// LogSoftmax -INPUT_MAP(LogSoftmax) = {{1, INPUT_DESC(logits)}}; -ATTR_MAP(LogSoftmax) = { - {"axis", ATTR_DESC(axis, AnyTraits>(), AnyTraits>())}}; -OUTPUT_MAP(LogSoftmax) = {{0, OUTPUT_DESC(logsoftmax)}}; +// LogSoftmaxV2 +INPUT_MAP(LogSoftmaxV2) = {{1, INPUT_DESC(logits)}}; +ATTR_MAP(LogSoftmaxV2) = { + {"axis", ATTR_DESC(axes, AnyTraits>(), AnyTraits>())}}; +OUTPUT_MAP(LogSoftmaxV2) = {{0, OUTPUT_DESC(logsoftmax)}}; // RandomChoiceWithMask INPUT_MAP(RandomChoiceWithMask) = {{1, INPUT_DESC(x)}}; @@ -1094,8 +1068,8 @@ OUTPUT_MAP(LayerNormGrad) = {{0, OUTPUT_DESC(pd_x)}, {1, OUTPUT_DESC(pd_gamma)}, // BatchMatMul INPUT_MAP(BatchMatMul) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}}; -ATTR_MAP(BatchMatMul) = {{"transpose_x1", ATTR_DESC(adj_x, AnyTraits())}, - {"transpose_x2", ATTR_DESC(adj_y, AnyTraits())}}; +ATTR_MAP(BatchMatMul) = {{"transpose_x1", ATTR_DESC(adj_x1, AnyTraits())}, + {"transpose_x2", ATTR_DESC(adj_x2, AnyTraits())}}; OUTPUT_MAP(BatchMatMul) = {{0, OUTPUT_DESC(y)}}; // DropoutDoMask @@ -1146,6 +1120,19 @@ ATTR_MAP(SparseApplyAdagradD) = {{"lr", ATTR_DESC(lr, AnyTraits())}, {"use_locking", ATTR_DESC(use_locking, AnyTraits())}}; OUTPUT_MAP(SparseApplyAdagradD) = {{0, OUTPUT_DESC(var)}}; +// SparseApplyFtrlD +INPUT_MAP(SparseApplyFtrlD) = {{1, INPUT_DESC(var)}, + {2, INPUT_DESC(accum)}, + {3, INPUT_DESC(linear)}, + {4, INPUT_DESC(grad)}, + {5, INPUT_DESC(indices)}}; +ATTR_MAP(SparseApplyFtrlD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits())}, + {"lr", ATTR_DESC(lr, AnyTraits())}, + {"l1", ATTR_DESC(l1, AnyTraits())}, + {"l2", ATTR_DESC(l2, AnyTraits())}, + {"lr_power", ATTR_DESC(lr_power, AnyTraits())}}; +OUTPUT_MAP(SparseApplyFtrlD) = {{0, OUTPUT_DESC(var)}}; + // SpaceToDepth INPUT_MAP(SpaceToDepth) = {{1, INPUT_DESC(x)}}; ATTR_MAP(SpaceToDepth) = {{"block_size", ATTR_DESC(block_size, AnyTraits())}}; diff --git a/mindspore/ccsrc/transform/op_declare.h b/mindspore/ccsrc/transform/op_declare.h index 03463b978fe..9e4f407ebb0 100755 --- a/mindspore/ccsrc/transform/op_declare.h +++ b/mindspore/ccsrc/transform/op_declare.h @@ -209,8 +209,8 @@ DECLARE_OP_USE_OUTPUT(Merge) DECLARE_OP_ADAPTER(Switch) DECLARE_OP_USE_OUTPUT(Switch) -DECLARE_OP_ADAPTER(TopKV2) -DECLARE_OP_USE_OUTPUT(TopKV2) +DECLARE_OP_ADAPTER(TopK) +DECLARE_OP_USE_OUTPUT(TopK) DECLARE_OP_ADAPTER(RealDiv) DECLARE_OP_USE_OUTPUT(RealDiv) @@ -260,8 +260,8 @@ DECLARE_OP_ADAPTER(Select) DECLARE_OP_USE_OUTPUT(Select) DECLARE_OP_ADAPTER(LessEqual) DECLARE_OP_USE_OUTPUT(LessEqual) -DECLARE_OP_ADAPTER(LogSoftmax) -DECLARE_OP_USE_OUTPUT(LogSoftmax) +DECLARE_OP_ADAPTER(LogSoftmaxV2) +DECLARE_OP_USE_OUTPUT(LogSoftmaxV2) DECLARE_OP_ADAPTER(TruncatedNormal) DECLARE_OP_USE_OUTPUT(TruncatedNormal) DECLARE_OP_ADAPTER(StridedSliceGrad) @@ -391,8 +391,8 @@ DECLARE_OP_ADAPTER(Sigmoid) DECLARE_OP_USE_OUTPUT(Sigmoid) DECLARE_OP_ADAPTER(SigmoidGrad) DECLARE_OP_USE_OUTPUT(SigmoidGrad) -DECLARE_OP_ADAPTER(Softmax) -DECLARE_OP_USE_OUTPUT(Softmax) +DECLARE_OP_ADAPTER(SoftmaxV2) +DECLARE_OP_USE_OUTPUT(SoftmaxV2) DECLARE_OP_ADAPTER(SoftmaxGrad) DECLARE_OP_USE_OUTPUT(SoftmaxGrad) DECLARE_OP_ADAPTER(Greater) @@ -435,6 +435,8 @@ DECLARE_OP_ADAPTER(Round) DECLARE_OP_USE_OUTPUT(Round) DECLARE_OP_ADAPTER(ApplyFtrl) DECLARE_OP_USE_OUTPUT(ApplyFtrl) +DECLARE_OP_ADAPTER(SparseApplyFtrlD) +DECLARE_OP_USE_OUTPUT(SparseApplyFtrlD) #ifdef ENABLE_GE DECLARE_OP_ADAPTER(Print) DECLARE_OP_USE_DYN_INPUT(Print) diff --git a/mindspore/ccsrc/transform/util.cc b/mindspore/ccsrc/transform/util.cc index a106a20ad80..0a18763d120 100644 --- a/mindspore/ccsrc/transform/util.cc +++ b/mindspore/ccsrc/transform/util.cc @@ -361,12 +361,11 @@ MeTensorPtr TransformUtil::GenerateMeTensor(const GeTensorPtr& ge_tensor, const MS_LOG(ERROR) << "GE tensor data size is zero!"; return nullptr; } - errno_t ret = memcpy_s(me_data_ptr, me_data_size, ge_tensor->GetData(), ge_tensor->GetSize()); - if (ret != EOK) { - MS_LOG(INFO) << "GE tensor data size is " << ge_tensor->GetSize() << " bytes"; - MS_LOG(ERROR) << "Copy GE tensor data to me tensor failed"; - return nullptr; - } + + // Use memcpy here, not memcpy_s, just because the size of ge_tensor may be bigger than 2GB + // which is the size limit of memcpy_s + memcpy(me_data_ptr, ge_tensor->GetData(), ge_tensor->GetSize()); + return make_shared(me_tensor); } diff --git a/mindspore/ccsrc/utils/context/ms_context.cc b/mindspore/ccsrc/utils/context/ms_context.cc index bf05af98582..e9b4586b217 100644 --- a/mindspore/ccsrc/utils/context/ms_context.cc +++ b/mindspore/ccsrc/utils/context/ms_context.cc @@ -355,7 +355,9 @@ void MsContext::GetGeOptions(std::map* ge_options) con MS_LOG(ERROR) << "Set proto lib path failed!"; } - // Disbale the global variable acc, only enable it whlie adding training graph in pipeline + // Enable auto mixed precision according to the context options + (*ge_options)["ge.exec.auto_mix_precision"] = std::to_string(auto_mixed_precision_flag_); + // Disable the global variable acc, only enable it whlie adding training graph in pipeline (*ge_options)["ge.exec.variable_acc"] = "0"; #endif } diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index a75b078df80..77bb6d0ff32 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -65,7 +65,7 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm, SmoothL1Loss, Softmax, SoftmaxCrossEntropyWithLogits, ROIAlign, SparseSoftmaxCrossEntropyWithLogits, Tanh, - TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl) + TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrlD) from .other_ops import Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, CheckValid, MakeRefKey @@ -217,6 +217,7 @@ __all__ = [ "Abs", "BinaryCrossEntropy", "SparseApplyAdagrad", + "SparseApplyFtrlD", "SpaceToDepth", "DepthToSpace", "Conv2DBackpropInput", diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index afa4c7dfe38..57e409b44f5 100644 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -2141,6 +2141,79 @@ class SparseApplyAdagrad(PrimitiveWithInfer): return var_type +class SparseApplyFtrlD(PrimitiveWithInfer): + r""" + Conduct experiment on updating on parameters related to FTRL optimization algorithm. + + .. math :: + \text{accum} = \text{grad} * \text{grad} + + .. math :: + \text{linear} += \text{grad} + (\text{accum} ^ {\text{-lr_power}} - + \frac{\text{accum} ^ \text{-lr_power}}{\text{lr}} * \text{var}) + + .. math :: + \text{quadratic} = {\text{1.0}/({\text{accum}^\text{lr_power} * \text{lr}}) + 2*\text{l2} + + .. math :: + \text{var} = {\text{sign}({linear}) * \text{l1} - \text{linear}})/{ quadratic } + if \vert linear \vert > l1 \ else \ 0.0 + + Args: + lr (float): Learning rate. + l1 (float): temp value NO.1. + l2 (float): temp value No.2. + lr_power (float): temp value used as power number. + use_locking (bool): If true, updating the var and accum tensors will be protected. Default: False. + + Inputs: + - **var** (Tensor) - Variable to be update. The type must be float32. + - **accum** (Tensor) - Accum to be update. The shape must be the same as `var`'s shape, + the type must be float32. + - **linear** (Tensor) - Linear to be update. The shape must be the same as `var`'s shape, + the type must be float32. + - **grad** (Tensor) - Gradient. The shape must be the same as `var`'s shape, + the type must be float32. + - **indices** (Tensor) - A vector of indices into the first dimension of 'var' and 'accum', + the shape of `indices` must be the same as `grad` in first dimension, the type must be int32. + + Output: + Tensors, has the same shape and type as `var`. + + """ + + @prim_attr_register + def __init__(self, lr, l1, l2, lr_power, use_locking=False): + """init SparseApplyFtrlD""" + self.lr = validator.check_type("lr", lr, [float]) + self.l1 = validator.check_type("l1", l1, [float]) + self.l2 = validator.check_type("l2", l2, [float]) + self.lr_power = validator.check_type("lr_power", lr_power, [float]) + self.use_locking = validator.check_type("use_locking", use_locking, [bool]) + + def infer_shape(self, var_shape, accum_shape, linear_shape, grad_shape, indices_shape): + validator.check_param_equal('var shape', var_shape, 'accum shape', accum_shape) + validator.check_param_equal('len of var shape', len(var_shape), 'len of grad shape', len(grad_shape)) + validator.check_param_equal('len of var shape', len(var_shape), 'len of linear shape', len(linear_shape)) + if len(var_shape) > 1: + validator.check_param_equal('var_shape', var_shape[1:], 'grad_shape', grad_shape[1:]) + validator.check_param_equal('var_shape', var_shape[1:], 'linear_shape', linear_shape[1:]) + validator.check_integer("len of indices shape", len(indices_shape), 1, Rel.EQ) + validator.check('the first dimension of grad', grad_shape[0], + 'the shape of indices', indices_shape[0], Rel.EQ) + + return var_shape + + def infer_dtype(self, var_type, accum_type, linear_type, grad_type, indices_type): + validator.check_subclass("var_type", var_type, mstype.tensor) + validator.check_subclass("accum_type", accum_type, mstype.tensor) + validator.check_subclass("linear_type", linear_type, mstype.tensor) + validator.check_subclass("grad_type", grad_type, mstype.tensor) + validator.check_subclass("indices_type", indices_type, mstype.tensor) + + return var_type + + class LARSUpdate(PrimitiveWithInfer): """ Conduct lars (layer-wise adaptive rate scaling) update on the square sum of gradient. @@ -2244,4 +2317,4 @@ class ApplyFtrl(PrimitiveWithInfer): validator.check_typename("l1", l1_type,[mstype.float16, mstype.float32]) validator.check_typename("l2", l2_type,[mstype.float16, mstype.float32]) validator.check_typename("lr_power", lr_power_type,[mstype.float16, mstype.float32]) - return var_type \ No newline at end of file + return var_type diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py index bfe80759728..8d7dd950723 100755 --- a/tests/ut/python/ops/test_ops.py +++ b/tests/ut/python/ops/test_ops.py @@ -749,6 +749,11 @@ test_case_nn_ops = [ 'desc_inputs': [[3, 3], [3, 3], [3, 3], Tensor(np.ones((3,), np.int32))], 'desc_bprop': [3, 3], 'skip': ['backward']}), + ('SparseApplyFtrlD', { + 'block': P.SparseApplyFtrlD(0.1, 0.1, 0.1, -0.1), + 'desc_inputs': [[3, 3], [3, 3], [3, 3], [3, 3], Tensor(2*np.ones((3,), np.int32))], + 'desc_bprop': [3, 3], + 'skip': ['backward']}), ('Flatten_1', { 'block': NetForFlatten(), 'desc_inputs': [Tensor(np.ones([2, 3, 4]).astype(np.int32)), Tensor(np.ones([2, 12]).astype(np.int32))],