forked from mindspore-Ecosystem/mindspore
modify reduceminD and reducemaxD IR
This commit is contained in:
parent
f338eb3a60
commit
6f2b7abe04
|
@ -1 +1 @@
|
|||
Subproject commit 21d3700f661576edc37607a3bc961874ee5189a7
|
||||
Subproject commit 092c7a1f6548cac7d40e677af3498c3c49ea2bfd
|
|
@ -1071,7 +1071,7 @@ bool ExecutorPy::AddDFGraph(const py::dict& init_params, const std::string& phas
|
|||
}
|
||||
std::string init_graph = "init_subgraph." + net_id;
|
||||
std::string checkpoint_name = "save." + net_id;
|
||||
if (phase == "train") {
|
||||
if (phase.find("train") != std::string::npos) {
|
||||
(void)DfGraphManager::GetInstance().AddGraph(phase, convertor.GetComputeGraph(), {{"ge.exec.variable_acc", "1"}});
|
||||
} else {
|
||||
(void)DfGraphManager::GetInstance().AddGraph(phase, convertor.GetComputeGraph());
|
||||
|
|
|
@ -171,6 +171,7 @@ const char kNameAbsGrad[] = "AbsGrad";
|
|||
const char kNameBinaryCrossEntropy[] = "BinaryCrossEntropy";
|
||||
const char kNameBinaryCrossEntropyGrad[] = "BinaryCrossEntropyGrad";
|
||||
const char kNameSparseApplyAdagrad[] = "SparseApplyAdagrad";
|
||||
const char kNameSparseApplyFtrlD[] = "SparseApplyFtrlD";
|
||||
const char kNameSpaceToDepth[] = "SpaceToDepth";
|
||||
const char kNameDepthToSpace[] = "DepthToSpace";
|
||||
const char kNameSign[] = "Sign";
|
||||
|
@ -189,7 +190,7 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma
|
|||
{string(kNameApplyMomentum), ADPT_DESC(ApplyMomentum)},
|
||||
{string(kNameMaxPool), ADPT_DESC(MaxPool)},
|
||||
{string(kNameAvgPool), ADPT_DESC(AvgPool)},
|
||||
{string(kNameTopK), ADPT_DESC(TopKV2)},
|
||||
{string(kNameTopK), ADPT_DESC(TopK)},
|
||||
{string(kNamePack), ADPT_DESC(Pack)},
|
||||
{string(kNameSplitD), ADPT_DESC(SplitD)},
|
||||
{string(kNameAllReduce), ADPT_DESC(HcomAllReduce)},
|
||||
|
@ -310,7 +311,7 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma
|
|||
{prim::kPrimMinimum->name(), ADPT_DESC(Minimum)},
|
||||
{prim::kPrimSelect->name(), ADPT_DESC(Select)},
|
||||
{string(kNameLessEqual), ADPT_DESC(LessEqual)},
|
||||
{prim::kPrimLogSoftmax->name(), ADPT_DESC(LogSoftmax)},
|
||||
{prim::kPrimLogSoftmax->name(), ADPT_DESC(LogSoftmaxV2)},
|
||||
{string(kNameTruncatedNormal), ADPT_DESC(TruncatedNormal)},
|
||||
{string(kNameStridedSliceGrad), ADPT_DESC(StridedSliceGrad)},
|
||||
{prim::kPrimGelu->name(), ADPT_DESC(Gelu)},
|
||||
|
@ -343,7 +344,7 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma
|
|||
{prim::kPrimMatMul->name(), ADPT_DESC(MatMul)},
|
||||
|
||||
{string(kNameConst), ADPT_DESC(Constant, Const)},
|
||||
{string(kNameSoftmax), ADPT_DESC(Softmax)},
|
||||
{string(kNameSoftmax), ADPT_DESC(SoftmaxV2)},
|
||||
{string(kNameSoftmaxGrad), ADPT_DESC(SoftmaxGrad)},
|
||||
{string(kNameParam), ADPT_DESC(Data)},
|
||||
{string(kNameROIAlign), ADPT_DESC(ROIAlign)},
|
||||
|
@ -353,6 +354,7 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma
|
|||
{string(kNameBinaryCrossEntropy), ADPT_DESC(BinaryCrossEntropy)},
|
||||
{string(kNameBinaryCrossEntropyGrad), ADPT_DESC(BinaryCrossEntropyGrad)},
|
||||
{string(kNameSparseApplyAdagrad), ADPT_DESC(SparseApplyAdagradD)},
|
||||
{string(kNameSparseApplyFtrlD), ADPT_DESC(SparseApplyFtrlD)},
|
||||
{string(kNameSpaceToDepth), ADPT_DESC(SpaceToDepth)},
|
||||
{string(kNameDepthToSpace), ADPT_DESC(DepthToSpace)},
|
||||
{string(kNameSign), ADPT_DESC(Sign)},
|
||||
|
@ -1017,8 +1019,8 @@ DfGraphConvertor &DfGraphConvertor::BuildGraph() {
|
|||
}
|
||||
}
|
||||
|
||||
// set up dependices
|
||||
MS_LOG(DEBUG) << "set up dependices";
|
||||
// set up dependencies
|
||||
MS_LOG(DEBUG) << "set up dependencies";
|
||||
std::vector<AnfNodePtr> nodes = ::mindspore::TopoSort(anf_graph_->get_return());
|
||||
for (auto &it : nodes) {
|
||||
SetNodeInput(it);
|
||||
|
@ -1115,8 +1117,8 @@ void DfGraphConvertor::UpdateDataOpDesc(const AnfNodePtr &it, const OperatorPtr
|
|||
if (desc == nullptr) {
|
||||
MS_LOG(ERROR) << "Update data op descriptor failed! TensorDesc is null.";
|
||||
} else {
|
||||
(void)std::static_pointer_cast<Data>(op)->update_input_desc_data(*desc);
|
||||
(void)std::static_pointer_cast<Data>(op)->update_output_desc_out(*desc);
|
||||
(void)std::static_pointer_cast<Data>(op)->update_input_desc_x(*desc);
|
||||
(void)std::static_pointer_cast<Data>(op)->update_output_desc_y(*desc);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -135,6 +135,13 @@ Status GraphRunner::RunGraph(const RunOptions& options, const std::vector<GeTens
|
|||
return Status::FAILED;
|
||||
}
|
||||
|
||||
// The information of some nodes could be changed after fusion in some cases
|
||||
// Therefore a graph needs to be rebuilt in above situation
|
||||
if (sess_->IsGraphNeedRebuild(wrap_ptr->id_)) {
|
||||
sess_->RemoveGraph(wrap_ptr->id_);
|
||||
sess_->AddGraph(wrap_ptr->id_, *(wrap_ptr->graph_ptr_), wrap_ptr->options_);
|
||||
}
|
||||
|
||||
ge::Status ret = sess_->RunGraph(wrap_ptr->id_, ge_inputs, ge_outputs);
|
||||
if (ret != ge::GRAPH_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Call GE RunGraph Failed, ret is: " << ret;
|
||||
|
|
|
@ -138,11 +138,10 @@ OUTPUT_MAP(ApplyMomentum) = {{0, OUTPUT_DESC(var)}};
|
|||
INPUT_MAP(Summary) = {{2, INPUT_DESC(x)}};
|
||||
ATTR_MAP(Summary) = EMPTY_ATTR_MAP;
|
||||
|
||||
// data
|
||||
// Data
|
||||
INPUT_MAP(Data) = EMPTY_INPUT_MAP;
|
||||
ATTR_MAP(Data) = EMPTY_ATTR_MAP;
|
||||
|
||||
// resnet ops in ge
|
||||
// BatchNorm
|
||||
INPUT_MAP(BatchNorm) = {{1, INPUT_DESC(x)},
|
||||
{2, INPUT_DESC(scale)},
|
||||
|
@ -194,9 +193,9 @@ OUTPUT_MAP(PRelu) = {{0, OUTPUT_DESC(y)}};
|
|||
|
||||
// PReluGrad
|
||||
INPUT_MAP(PReluGrad) = {
|
||||
{1, INPUT_DESC(input_gradients)}, {2, INPUT_DESC(input_features)}, {3, INPUT_DESC(input_weights)}};
|
||||
{1, INPUT_DESC(grads)}, {2, INPUT_DESC(features)}, {3, INPUT_DESC(weights)}};
|
||||
ATTR_MAP(PReluGrad) = EMPTY_ATTR_MAP;
|
||||
OUTPUT_MAP(PReluGrad) = {{0, OUTPUT_DESC(output_backprops_dx)}, {1, OUTPUT_DESC(output_backprops_da)}};
|
||||
OUTPUT_MAP(PReluGrad) = {{0, OUTPUT_DESC(dx)}, {1, OUTPUT_DESC(da)}};
|
||||
|
||||
// Sigmoid
|
||||
INPUT_MAP(Sigmoid) = {{1, INPUT_DESC(x)}};
|
||||
|
@ -241,12 +240,12 @@ ATTR_MAP(CumsumD) = {{"exclusive", ATTR_DESC(exclusive, AnyTraits<bool>())},
|
|||
{"reverse", ATTR_DESC(reverse, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(CumsumD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// softmax
|
||||
INPUT_MAP(Softmax) = {{1, INPUT_DESC(x)}};
|
||||
ATTR_MAP(Softmax) = {
|
||||
{"axis", ATTR_DESC(axis, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
|
||||
// SoftmaxV2
|
||||
INPUT_MAP(SoftmaxV2) = {{1, INPUT_DESC(x)}};
|
||||
ATTR_MAP(SoftmaxV2) = {
|
||||
{"axis", ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
|
||||
};
|
||||
OUTPUT_MAP(Softmax) = {{0, OUTPUT_DESC(y)}};
|
||||
OUTPUT_MAP(SoftmaxV2) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// SoftmaxGrad
|
||||
INPUT_MAP(SoftmaxGrad) = {{1, INPUT_DESC(softmax)}, {2, INPUT_DESC(grad_softmax)}};
|
||||
|
@ -269,21 +268,21 @@ ATTR_MAP(GatherV2) = EMPTY_ATTR_MAP;
|
|||
OUTPUT_MAP(GatherV2) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// ReduceSum
|
||||
INPUT_MAP(ReduceSum) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(axis)}};
|
||||
INPUT_MAP(ReduceSum) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(axes)}};
|
||||
ATTR_MAP(ReduceSum) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(ReduceSum) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// ReduceSumD
|
||||
INPUT_MAP(ReduceSumD) = {{1, INPUT_DESC(x)}};
|
||||
INPUT_ATTR_MAP(ReduceSumD) = {
|
||||
{2, ATTR_DESC(axis, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
{2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
ATTR_MAP(ReduceSumD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(ReduceSumD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// ReduceProdD
|
||||
INPUT_MAP(ReduceProdD) = {{1, INPUT_DESC(x)}};
|
||||
INPUT_ATTR_MAP(ReduceProdD) = {
|
||||
{2, ATTR_DESC(axis, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
{2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
ATTR_MAP(ReduceProdD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(ReduceProdD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
|
@ -294,7 +293,7 @@ ATTR_MAP(CumprodD) = {{"exclusive", ATTR_DESC(exclusive, AnyTraits<bool>())},
|
|||
{"reverse", ATTR_DESC(reverse, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(CumprodD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// SoftmaxCrossEntropyWithLogits/
|
||||
// SoftmaxCrossEntropyWithLogits
|
||||
INPUT_MAP(SoftmaxCrossEntropyWithLogits) = {{1, INPUT_DESC(features)}, {2, INPUT_DESC(labels)}};
|
||||
ATTR_MAP(SoftmaxCrossEntropyWithLogits) = EMPTY_ATTR_MAP;
|
||||
OUTPUT_MAP(SoftmaxCrossEntropyWithLogits) = {{0, OUTPUT_DESC(loss)}, {1, OUTPUT_DESC(backprop)}};
|
||||
|
@ -306,7 +305,7 @@ INPUT_ATTR_MAP(MeanGrad) = {{2, ATTR_DESC(mean_grad_output_shape_value, kOpForma
|
|||
ATTR_MAP(MeanGrad) = {{"mode", ATTR_DESC(mode, AnyTraits<int64_t>())}};
|
||||
|
||||
INPUT_MAP(SliceD) = {{1, INPUT_DESC(x)}};
|
||||
INPUT_ATTR_MAP(SliceD) = {{2, ATTR_DESC(begin, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
|
||||
INPUT_ATTR_MAP(SliceD) = {{2, ATTR_DESC(offsets, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
|
||||
{3, ATTR_DESC(size, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
ATTR_MAP(SliceD) = EMPTY_ATTR_MAP;
|
||||
OUTPUT_MAP(SliceD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
@ -401,42 +400,10 @@ ATTR_MAP(BoundingBoxDecode) = {
|
|||
};
|
||||
OUTPUT_MAP(BoundingBoxDecode) = {{0, OUTPUT_DESC(bboxes)}};
|
||||
|
||||
#ifdef VALID_CODE
|
||||
|
||||
// Less
|
||||
INPUT_MAP(Less) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}};
|
||||
ATTR_MAP(Less) = EMPTY_ATTR_MAP;
|
||||
OUTPUT_MAP(Less) = {{0, OUTPUT_DESC(z)}};
|
||||
|
||||
// Cast
|
||||
INPUT_MAP(Cast) = {{1, INPUT_DESC(x)}};
|
||||
INPUT_ATTR_MAP(Cast) = {{2, ATTR_DESC(dst_type, AnyTraits<GEType>())}};
|
||||
ATTR_MAP(Cast) = {{"Truncate", ATTR_DESC(truncate, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(Cast) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// Minimum
|
||||
INPUT_MAP(Minimum) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}};
|
||||
ATTR_MAP(Minimum) = {{"alpha", ATTR_DESC(alpha, AnyTraits<float>())}, {"beta", ATTR_DESC(beta, AnyTraits<float>())}};
|
||||
OUTPUT_MAP(Minimum) = {{0, OUTPUT_DESC(z)}};
|
||||
|
||||
// Sub
|
||||
INPUT_MAP(Sub) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
|
||||
ATTR_MAP(Sub) = {{"alpha", ATTR_DESC(alpha, AnyTraits<float>())}, {"beta", ATTR_DESC(beta, AnyTraits<float>())}};
|
||||
|
||||
#endif
|
||||
|
||||
// TopKV2
|
||||
INPUT_MAP(TopKV2) = {
|
||||
{1, INPUT_DESC(input)},
|
||||
{2, INPUT_DESC(k)},
|
||||
};
|
||||
|
||||
ATTR_MAP(TopKV2) = {{"T", ATTR_DESC(T, AnyTraits<GEType>())}, {"sorted", ATTR_DESC(sorted, AnyTraits<bool>())}};
|
||||
|
||||
OUTPUT_MAP(TopKV2) = {
|
||||
{0, OUTPUT_DESC(values)},
|
||||
{1, OUTPUT_DESC(indices)},
|
||||
};
|
||||
// TopK
|
||||
INPUT_MAP(TopK) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(k)}};
|
||||
ATTR_MAP(TopK) = {{"sorted", ATTR_DESC(sorted, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(TopK) = {{0, OUTPUT_DESC(values)}, {1, OUTPUT_DESC(indices)}};
|
||||
|
||||
// Multiply
|
||||
INPUT_MAP(Multiply) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(y)}};
|
||||
|
@ -476,7 +443,7 @@ ATTR_MAP(Iou) = {{"mode", ATTR_DESC(mode, AnyTraits<std::string>())}};
|
|||
OUTPUT_MAP(Iou) = {{0, OUTPUT_DESC(overlap)}};
|
||||
|
||||
// ResizeNearestNeighborD
|
||||
INPUT_MAP(ResizeNearestNeighborD) = {{1, INPUT_DESC(images)}};
|
||||
INPUT_MAP(ResizeNearestNeighborD) = {{1, INPUT_DESC(x)}};
|
||||
ATTR_MAP(ResizeNearestNeighborD) = {
|
||||
{"size", ATTR_DESC(size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
|
||||
{"align_corners", ATTR_DESC(align_corners, AnyTraits<bool>())}};
|
||||
|
@ -506,17 +473,17 @@ ATTR_MAP(Relu6) = EMPTY_ATTR_MAP;
|
|||
OUTPUT_MAP(Relu6) = {{0, OUTPUT_DESC(activations)}};
|
||||
|
||||
// Relu6Grad
|
||||
INPUT_MAP(Relu6Grad) = {{1, INPUT_DESC(dy)}, {2, INPUT_DESC(y)}};
|
||||
INPUT_MAP(Relu6Grad) = {{1, INPUT_DESC(features)}, {2, INPUT_DESC(gradients)}};
|
||||
ATTR_MAP(Relu6Grad) = EMPTY_ATTR_MAP;
|
||||
OUTPUT_MAP(Relu6Grad) = {{0, OUTPUT_DESC(z)}};
|
||||
OUTPUT_MAP(Relu6Grad) = {{0, OUTPUT_DESC(backprops)}};
|
||||
|
||||
// ResizeBilinearGrad
|
||||
INPUT_MAP(ResizeBilinearGrad) = {{1, INPUT_DESC(grads)}, {2, INPUT_DESC(original_image)}};
|
||||
ATTR_MAP(ResizeBilinearGrad) = {{"align_corners", ATTR_DESC(align_corners, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(ResizeBilinearGrad) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// ResizeBilinear
|
||||
INPUT_MAP(ResizeBilinearD) = {{1, INPUT_DESC(images)}};
|
||||
// ResizeBilinearD
|
||||
INPUT_MAP(ResizeBilinearD) = {{1, INPUT_DESC(x)}};
|
||||
ATTR_MAP(ResizeBilinearD) = {
|
||||
{"size", ATTR_DESC(size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
|
||||
{"align_corners", ATTR_DESC(align_corners, AnyTraits<bool>())}};
|
||||
|
@ -539,9 +506,9 @@ OUTPUT_MAP(NMSWithMask) = {
|
|||
{0, OUTPUT_DESC(selected_boxes)}, {1, OUTPUT_DESC(selected_idx)}, {2, OUTPUT_DESC(selected_mask)}};
|
||||
|
||||
// Unpack
|
||||
INPUT_MAP(Unpack) = {{1, INPUT_DESC(value)}};
|
||||
INPUT_MAP(Unpack) = {{1, INPUT_DESC(x)}};
|
||||
ATTR_MAP(Unpack) = {{"axis", ATTR_DESC(axis, AnyTraits<int>())}, {"num", ATTR_DESC(num, AnyTraits<int>())}};
|
||||
DYN_OUTPUT_MAP(Unpack) = {{0, DYN_OUTPUT_DESC(output)}};
|
||||
DYN_OUTPUT_MAP(Unpack) = {{0, DYN_OUTPUT_DESC(y)}};
|
||||
|
||||
// ScatterNdUpdate
|
||||
INPUT_MAP(ScatterNdUpdate) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(indices)}, {3, INPUT_DESC(updates)}};
|
||||
|
@ -574,8 +541,8 @@ INPUT_MAP(SigmoidCrossEntropyWithLogitsGrad) = {
|
|||
ATTR_MAP(SigmoidCrossEntropyWithLogitsGrad) = EMPTY_ATTR_MAP;
|
||||
OUTPUT_MAP(SigmoidCrossEntropyWithLogitsGrad) = {{0, OUTPUT_DESC(gradient)}};
|
||||
|
||||
// ScatterNd
|
||||
INPUT_MAP(ScatterNdD) = {{1, INPUT_DESC(indices)}, {2, INPUT_DESC(updates)}};
|
||||
// ScatterNdD
|
||||
INPUT_MAP(ScatterNdD) = {{1, INPUT_DESC(indices)}, {2, INPUT_DESC(x)}};
|
||||
INPUT_ATTR_MAP(ScatterNdD) = {
|
||||
{3, ATTR_DESC(shape, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
ATTR_MAP(ScatterNdD) = EMPTY_ATTR_MAP;
|
||||
|
@ -587,7 +554,7 @@ ATTR_MAP(PadD) = {{"paddings", ATTR_DESC(paddings, AnyTraits<std::vector<std::ve
|
|||
OUTPUT_MAP(PadD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// GatherNd
|
||||
INPUT_MAP(GatherNd) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
|
||||
INPUT_MAP(GatherNd) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(indices)}};
|
||||
ATTR_MAP(GatherNd) = EMPTY_ATTR_MAP;
|
||||
OUTPUT_MAP(GatherNd) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
|
@ -612,13 +579,13 @@ ATTR_MAP(ROIAlignGrad) = {
|
|||
// ArgMaxD
|
||||
INPUT_MAP(ArgMaxD) = {{1, INPUT_DESC(x)}};
|
||||
ATTR_MAP(ArgMaxD) = {{"axis", ATTR_DESC(dimension, AnyTraits<int>())},
|
||||
{"output_type", ATTR_DESC(output_type, AnyTraits<GEType>())}};
|
||||
{"output_type", ATTR_DESC(dtype, AnyTraits<GEType>())}};
|
||||
OUTPUT_MAP(ArgMaxD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// ArgMinD
|
||||
INPUT_MAP(ArgMinD) = {{1, INPUT_DESC(x)}};
|
||||
ATTR_MAP(ArgMinD) = {{"axis", ATTR_DESC(dimension, AnyTraits<int>())},
|
||||
{"output_type", ATTR_DESC(output_type, AnyTraits<GEType>())}};
|
||||
{"output_type", ATTR_DESC(dtype, AnyTraits<GEType>())}};
|
||||
OUTPUT_MAP(ArgMinD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// ArgMaxWithValue
|
||||
|
@ -634,14 +601,14 @@ ATTR_MAP(ArgMinWithValue) = {{"axis", ATTR_DESC(dimension, AnyTraits<int>())},
|
|||
OUTPUT_MAP(ArgMinWithValue) = {{0, OUTPUT_DESC(indice)}, {1, OUTPUT_DESC(values)}};
|
||||
|
||||
// ReduceAll
|
||||
INPUT_MAP(ReduceAll) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(axis)}};
|
||||
INPUT_MAP(ReduceAll) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(axes)}};
|
||||
ATTR_MAP(ReduceAll) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(ReduceAll) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// ReduceMeanD
|
||||
INPUT_MAP(ReduceMeanD) = {{1, INPUT_DESC(x)}};
|
||||
INPUT_ATTR_MAP(ReduceMeanD) = {
|
||||
{2, ATTR_DESC(axis, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
{2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
ATTR_MAP(ReduceMeanD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(ReduceMeanD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
|
@ -708,11 +675,12 @@ INPUT_MAP(BiasAddGrad) = {{1, INPUT_DESC(x)}};
|
|||
ATTR_MAP(BiasAddGrad) = {{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
|
||||
OUTPUT_MAP(BiasAddGrad) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// maxpoolgrad
|
||||
// MaxPoolGrad
|
||||
INPUT_MAP(MaxPoolGrad) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}, {3, INPUT_DESC(grad)}};
|
||||
ATTR_MAP(MaxPoolGrad) = {{"ksize", ATTR_DESC(ksize, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
|
||||
{"strides", ATTR_DESC(strides, AnyTraits<int>(), AnyTraits<std::vector<int64_t>>())},
|
||||
{"padding", ATTR_DESC(padding, AnyTraits<std::string>())}};
|
||||
{"padding", ATTR_DESC(padding, AnyTraits<std::string>())},
|
||||
{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
|
||||
OUTPUT_MAP(MaxPoolGrad) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// avgpoolgrad
|
||||
|
@ -739,28 +707,34 @@ ATTR_MAP(Conv2D) = {
|
|||
{"stride", ATTR_DESC(strides, "pad", AnyTraits<std::vector<int64_t>>())},
|
||||
{"pad_list", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
|
||||
{"dilation", ATTR_DESC(dilations, "pad", AnyTraits<std::vector<int64_t>>())},
|
||||
{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
|
||||
{"group", ATTR_DESC(groups, AnyTraits<int>())}
|
||||
};
|
||||
OUTPUT_MAP(Conv2D) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// Conv2DBackpropInputD
|
||||
INPUT_MAP(Conv2DBackpropInputD) = {{1, INPUT_DESC(out_backprop)}, {2, INPUT_DESC(filters)}};
|
||||
INPUT_MAP(Conv2DBackpropInputD) = {{1, INPUT_DESC(out_backprop)}, {2, INPUT_DESC(filter)}};
|
||||
INPUT_ATTR_MAP(Conv2DBackpropInputD) = {
|
||||
{3, ATTR_DESC(input_sizes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
{3, ATTR_DESC(input_size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
ATTR_MAP(Conv2DBackpropInputD) = {
|
||||
{"pad_list", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
|
||||
{"stride", ATTR_DESC(strides, "strides", AnyTraits<std::vector<int64_t>>())},
|
||||
{"stride", ATTR_DESC(strides, "pad", AnyTraits<std::vector<int64_t>>())},
|
||||
{"dilation", ATTR_DESC(dilations, "pad", AnyTraits<std::vector<int64_t>>())},
|
||||
{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
|
||||
{"group", ATTR_DESC(groups, AnyTraits<int>())}
|
||||
};
|
||||
OUTPUT_MAP(Conv2DBackpropInputD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// Conv2DBackpropFilterD
|
||||
INPUT_MAP(Conv2DBackpropFilterD) = {{1, INPUT_DESC(out_backprop)}, {2, INPUT_DESC(x)}};
|
||||
INPUT_ATTR_MAP(Conv2DBackpropFilterD) = {
|
||||
{3, ATTR_DESC(filter_sizes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
{3, ATTR_DESC(filter_size, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
ATTR_MAP(Conv2DBackpropFilterD) = {
|
||||
{"pad_list", ATTR_DESC(pads, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())},
|
||||
{"stride", ATTR_DESC(strides, "strides", AnyTraits<std::vector<int64_t>>())},
|
||||
{"stride", ATTR_DESC(strides, "pad", AnyTraits<std::vector<int64_t>>())},
|
||||
{"dilation", ATTR_DESC(dilations, "pad", AnyTraits<std::vector<int64_t>>())},
|
||||
{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())},
|
||||
{"group", ATTR_DESC(groups, AnyTraits<int>())}
|
||||
};
|
||||
OUTPUT_MAP(Conv2DBackpropFilterD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
|
@ -798,8 +772,8 @@ OUTPUT_MAP(DepthwiseConv2DBackpropFilterD) = {{0, OUTPUT_DESC(filter_grad)}};
|
|||
|
||||
// MatMul
|
||||
INPUT_MAP(MatMul) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
|
||||
ATTR_MAP(MatMul) = {{"transpose_a", ATTR_DESC(transpose_a, AnyTraits<bool>())},
|
||||
{"transpose_b", ATTR_DESC(transpose_b, AnyTraits<bool>())}};
|
||||
ATTR_MAP(MatMul) = {{"transpose_a", ATTR_DESC(transpose_x1, AnyTraits<bool>())},
|
||||
{"transpose_b", ATTR_DESC(transpose_x2, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(MatMul) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// Merge
|
||||
|
@ -846,10 +820,10 @@ ATTR_MAP(Sub) = EMPTY_ATTR_MAP;
|
|||
OUTPUT_MAP(Sub) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// SplitD
|
||||
INPUT_MAP(SplitD) = {{1, INPUT_DESC(value)}};
|
||||
INPUT_MAP(SplitD) = {{1, INPUT_DESC(x)}};
|
||||
ATTR_MAP(SplitD) = {{"axis", ATTR_DESC(split_dim, AnyTraits<int>())},
|
||||
{"output_num", ATTR_DESC(num_split, AnyTraits<int>())}};
|
||||
DYN_OUTPUT_MAP(SplitD) = {{0, DYN_OUTPUT_DESC(output)}};
|
||||
DYN_OUTPUT_MAP(SplitD) = {{0, DYN_OUTPUT_DESC(y)}};
|
||||
|
||||
// Neg
|
||||
INPUT_MAP(Neg) = {{1, INPUT_DESC(x)}};
|
||||
|
@ -876,12 +850,12 @@ OUTPUT_MAP(Pack) = {{0, OUTPUT_DESC(y)}};
|
|||
|
||||
// ConcatD
|
||||
INPUT_MAP(ConcatD) = EMPTY_INPUT_MAP;
|
||||
DYN_INPUT_MAP(ConcatD) = {{1, DYN_INPUT_DESC(input_values)}};
|
||||
DYN_INPUT_MAP(ConcatD) = {{1, DYN_INPUT_DESC(x)}};
|
||||
ATTR_MAP(ConcatD) = {
|
||||
{"axis", ATTR_DESC(concat_dim, AnyTraits<int>())},
|
||||
{"inputNums", ATTR_DESC(N, AnyTraits<int>())},
|
||||
};
|
||||
OUTPUT_MAP(ConcatD) = {{0, OUTPUT_DESC(output_data)}};
|
||||
OUTPUT_MAP(ConcatD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// Less
|
||||
INPUT_MAP(Less) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
|
||||
|
@ -916,14 +890,14 @@ OUTPUT_MAP(TanhGrad) = {{0, OUTPUT_DESC(z)}};
|
|||
// ReduceMinD
|
||||
INPUT_MAP(ReduceMinD) = {{1, INPUT_DESC(x)}};
|
||||
INPUT_ATTR_MAP(ReduceMinD) = {
|
||||
{2, ATTR_DESC(axis, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
{2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
ATTR_MAP(ReduceMinD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(ReduceMinD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// ReduceMaxD
|
||||
INPUT_MAP(ReduceMaxD) = {{1, INPUT_DESC(x)}};
|
||||
INPUT_ATTR_MAP(ReduceMaxD) = {
|
||||
{2, ATTR_DESC(axis, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
{2, ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
ATTR_MAP(ReduceMaxD) = {{"keep_dims", ATTR_DESC(keep_dims, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(ReduceMaxD) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
|
@ -1008,11 +982,11 @@ INPUT_MAP(LessEqual) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
|
|||
ATTR_MAP(LessEqual) = EMPTY_ATTR_MAP;
|
||||
OUTPUT_MAP(LessEqual) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// LogSoftmax
|
||||
INPUT_MAP(LogSoftmax) = {{1, INPUT_DESC(logits)}};
|
||||
ATTR_MAP(LogSoftmax) = {
|
||||
{"axis", ATTR_DESC(axis, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
OUTPUT_MAP(LogSoftmax) = {{0, OUTPUT_DESC(logsoftmax)}};
|
||||
// LogSoftmaxV2
|
||||
INPUT_MAP(LogSoftmaxV2) = {{1, INPUT_DESC(logits)}};
|
||||
ATTR_MAP(LogSoftmaxV2) = {
|
||||
{"axis", ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>(), AnyTraits<std::vector<int64_t>>())}};
|
||||
OUTPUT_MAP(LogSoftmaxV2) = {{0, OUTPUT_DESC(logsoftmax)}};
|
||||
|
||||
// RandomChoiceWithMask
|
||||
INPUT_MAP(RandomChoiceWithMask) = {{1, INPUT_DESC(x)}};
|
||||
|
@ -1094,8 +1068,8 @@ OUTPUT_MAP(LayerNormGrad) = {{0, OUTPUT_DESC(pd_x)}, {1, OUTPUT_DESC(pd_gamma)},
|
|||
|
||||
// BatchMatMul
|
||||
INPUT_MAP(BatchMatMul) = {{1, INPUT_DESC(x1)}, {2, INPUT_DESC(x2)}};
|
||||
ATTR_MAP(BatchMatMul) = {{"transpose_x1", ATTR_DESC(adj_x, AnyTraits<bool>())},
|
||||
{"transpose_x2", ATTR_DESC(adj_y, AnyTraits<bool>())}};
|
||||
ATTR_MAP(BatchMatMul) = {{"transpose_x1", ATTR_DESC(adj_x1, AnyTraits<bool>())},
|
||||
{"transpose_x2", ATTR_DESC(adj_x2, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(BatchMatMul) = {{0, OUTPUT_DESC(y)}};
|
||||
|
||||
// DropoutDoMask
|
||||
|
@ -1146,6 +1120,19 @@ ATTR_MAP(SparseApplyAdagradD) = {{"lr", ATTR_DESC(lr, AnyTraits<float>())},
|
|||
{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}};
|
||||
OUTPUT_MAP(SparseApplyAdagradD) = {{0, OUTPUT_DESC(var)}};
|
||||
|
||||
// SparseApplyFtrlD
|
||||
INPUT_MAP(SparseApplyFtrlD) = {{1, INPUT_DESC(var)},
|
||||
{2, INPUT_DESC(accum)},
|
||||
{3, INPUT_DESC(linear)},
|
||||
{4, INPUT_DESC(grad)},
|
||||
{5, INPUT_DESC(indices)}};
|
||||
ATTR_MAP(SparseApplyFtrlD) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())},
|
||||
{"lr", ATTR_DESC(lr, AnyTraits<float>())},
|
||||
{"l1", ATTR_DESC(l1, AnyTraits<float>())},
|
||||
{"l2", ATTR_DESC(l2, AnyTraits<float>())},
|
||||
{"lr_power", ATTR_DESC(lr_power, AnyTraits<float>())}};
|
||||
OUTPUT_MAP(SparseApplyFtrlD) = {{0, OUTPUT_DESC(var)}};
|
||||
|
||||
// SpaceToDepth
|
||||
INPUT_MAP(SpaceToDepth) = {{1, INPUT_DESC(x)}};
|
||||
ATTR_MAP(SpaceToDepth) = {{"block_size", ATTR_DESC(block_size, AnyTraits<int64_t>())}};
|
||||
|
|
|
@ -209,8 +209,8 @@ DECLARE_OP_USE_OUTPUT(Merge)
|
|||
DECLARE_OP_ADAPTER(Switch)
|
||||
DECLARE_OP_USE_OUTPUT(Switch)
|
||||
|
||||
DECLARE_OP_ADAPTER(TopKV2)
|
||||
DECLARE_OP_USE_OUTPUT(TopKV2)
|
||||
DECLARE_OP_ADAPTER(TopK)
|
||||
DECLARE_OP_USE_OUTPUT(TopK)
|
||||
|
||||
DECLARE_OP_ADAPTER(RealDiv)
|
||||
DECLARE_OP_USE_OUTPUT(RealDiv)
|
||||
|
@ -260,8 +260,8 @@ DECLARE_OP_ADAPTER(Select)
|
|||
DECLARE_OP_USE_OUTPUT(Select)
|
||||
DECLARE_OP_ADAPTER(LessEqual)
|
||||
DECLARE_OP_USE_OUTPUT(LessEqual)
|
||||
DECLARE_OP_ADAPTER(LogSoftmax)
|
||||
DECLARE_OP_USE_OUTPUT(LogSoftmax)
|
||||
DECLARE_OP_ADAPTER(LogSoftmaxV2)
|
||||
DECLARE_OP_USE_OUTPUT(LogSoftmaxV2)
|
||||
DECLARE_OP_ADAPTER(TruncatedNormal)
|
||||
DECLARE_OP_USE_OUTPUT(TruncatedNormal)
|
||||
DECLARE_OP_ADAPTER(StridedSliceGrad)
|
||||
|
@ -391,8 +391,8 @@ DECLARE_OP_ADAPTER(Sigmoid)
|
|||
DECLARE_OP_USE_OUTPUT(Sigmoid)
|
||||
DECLARE_OP_ADAPTER(SigmoidGrad)
|
||||
DECLARE_OP_USE_OUTPUT(SigmoidGrad)
|
||||
DECLARE_OP_ADAPTER(Softmax)
|
||||
DECLARE_OP_USE_OUTPUT(Softmax)
|
||||
DECLARE_OP_ADAPTER(SoftmaxV2)
|
||||
DECLARE_OP_USE_OUTPUT(SoftmaxV2)
|
||||
DECLARE_OP_ADAPTER(SoftmaxGrad)
|
||||
DECLARE_OP_USE_OUTPUT(SoftmaxGrad)
|
||||
DECLARE_OP_ADAPTER(Greater)
|
||||
|
@ -435,6 +435,8 @@ DECLARE_OP_ADAPTER(Round)
|
|||
DECLARE_OP_USE_OUTPUT(Round)
|
||||
DECLARE_OP_ADAPTER(ApplyFtrl)
|
||||
DECLARE_OP_USE_OUTPUT(ApplyFtrl)
|
||||
DECLARE_OP_ADAPTER(SparseApplyFtrlD)
|
||||
DECLARE_OP_USE_OUTPUT(SparseApplyFtrlD)
|
||||
#ifdef ENABLE_GE
|
||||
DECLARE_OP_ADAPTER(Print)
|
||||
DECLARE_OP_USE_DYN_INPUT(Print)
|
||||
|
|
|
@ -361,12 +361,11 @@ MeTensorPtr TransformUtil::GenerateMeTensor(const GeTensorPtr& ge_tensor, const
|
|||
MS_LOG(ERROR) << "GE tensor data size is zero!";
|
||||
return nullptr;
|
||||
}
|
||||
errno_t ret = memcpy_s(me_data_ptr, me_data_size, ge_tensor->GetData(), ge_tensor->GetSize());
|
||||
if (ret != EOK) {
|
||||
MS_LOG(INFO) << "GE tensor data size is " << ge_tensor->GetSize() << " bytes";
|
||||
MS_LOG(ERROR) << "Copy GE tensor data to me tensor failed";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Use memcpy here, not memcpy_s, just because the size of ge_tensor may be bigger than 2GB
|
||||
// which is the size limit of memcpy_s
|
||||
memcpy(me_data_ptr, ge_tensor->GetData(), ge_tensor->GetSize());
|
||||
|
||||
return make_shared<MeTensor>(me_tensor);
|
||||
}
|
||||
|
||||
|
|
|
@ -355,7 +355,9 @@ void MsContext::GetGeOptions(std::map<std::string, std::string>* ge_options) con
|
|||
MS_LOG(ERROR) << "Set proto lib path failed!";
|
||||
}
|
||||
|
||||
// Disbale the global variable acc, only enable it whlie adding training graph in pipeline
|
||||
// Enable auto mixed precision according to the context options
|
||||
(*ge_options)["ge.exec.auto_mix_precision"] = std::to_string(auto_mixed_precision_flag_);
|
||||
// Disable the global variable acc, only enable it whlie adding training graph in pipeline
|
||||
(*ge_options)["ge.exec.variable_acc"] = "0";
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -65,7 +65,7 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
|
|||
SmoothL1Loss, Softmax,
|
||||
SoftmaxCrossEntropyWithLogits, ROIAlign,
|
||||
SparseSoftmaxCrossEntropyWithLogits, Tanh,
|
||||
TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl)
|
||||
TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrlD)
|
||||
from .other_ops import Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, CheckValid, MakeRefKey
|
||||
|
||||
|
||||
|
@ -217,6 +217,7 @@ __all__ = [
|
|||
"Abs",
|
||||
"BinaryCrossEntropy",
|
||||
"SparseApplyAdagrad",
|
||||
"SparseApplyFtrlD",
|
||||
"SpaceToDepth",
|
||||
"DepthToSpace",
|
||||
"Conv2DBackpropInput",
|
||||
|
|
|
@ -2141,6 +2141,79 @@ class SparseApplyAdagrad(PrimitiveWithInfer):
|
|||
return var_type
|
||||
|
||||
|
||||
class SparseApplyFtrlD(PrimitiveWithInfer):
|
||||
r"""
|
||||
Conduct experiment on updating on parameters related to FTRL optimization algorithm.
|
||||
|
||||
.. math ::
|
||||
\text{accum} = \text{grad} * \text{grad}
|
||||
|
||||
.. math ::
|
||||
\text{linear} += \text{grad} + (\text{accum} ^ {\text{-lr_power}} -
|
||||
\frac{\text{accum} ^ \text{-lr_power}}{\text{lr}} * \text{var})
|
||||
|
||||
.. math ::
|
||||
\text{quadratic} = {\text{1.0}/({\text{accum}^\text{lr_power} * \text{lr}}) + 2*\text{l2}
|
||||
|
||||
.. math ::
|
||||
\text{var} = {\text{sign}({linear}) * \text{l1} - \text{linear}})/{ quadratic }
|
||||
if \vert linear \vert > l1 \ else \ 0.0
|
||||
|
||||
Args:
|
||||
lr (float): Learning rate.
|
||||
l1 (float): temp value NO.1.
|
||||
l2 (float): temp value No.2.
|
||||
lr_power (float): temp value used as power number.
|
||||
use_locking (bool): If true, updating the var and accum tensors will be protected. Default: False.
|
||||
|
||||
Inputs:
|
||||
- **var** (Tensor) - Variable to be update. The type must be float32.
|
||||
- **accum** (Tensor) - Accum to be update. The shape must be the same as `var`'s shape,
|
||||
the type must be float32.
|
||||
- **linear** (Tensor) - Linear to be update. The shape must be the same as `var`'s shape,
|
||||
the type must be float32.
|
||||
- **grad** (Tensor) - Gradient. The shape must be the same as `var`'s shape,
|
||||
the type must be float32.
|
||||
- **indices** (Tensor) - A vector of indices into the first dimension of 'var' and 'accum',
|
||||
the shape of `indices` must be the same as `grad` in first dimension, the type must be int32.
|
||||
|
||||
Output:
|
||||
Tensors, has the same shape and type as `var`.
|
||||
|
||||
"""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, lr, l1, l2, lr_power, use_locking=False):
|
||||
"""init SparseApplyFtrlD"""
|
||||
self.lr = validator.check_type("lr", lr, [float])
|
||||
self.l1 = validator.check_type("l1", l1, [float])
|
||||
self.l2 = validator.check_type("l2", l2, [float])
|
||||
self.lr_power = validator.check_type("lr_power", lr_power, [float])
|
||||
self.use_locking = validator.check_type("use_locking", use_locking, [bool])
|
||||
|
||||
def infer_shape(self, var_shape, accum_shape, linear_shape, grad_shape, indices_shape):
|
||||
validator.check_param_equal('var shape', var_shape, 'accum shape', accum_shape)
|
||||
validator.check_param_equal('len of var shape', len(var_shape), 'len of grad shape', len(grad_shape))
|
||||
validator.check_param_equal('len of var shape', len(var_shape), 'len of linear shape', len(linear_shape))
|
||||
if len(var_shape) > 1:
|
||||
validator.check_param_equal('var_shape', var_shape[1:], 'grad_shape', grad_shape[1:])
|
||||
validator.check_param_equal('var_shape', var_shape[1:], 'linear_shape', linear_shape[1:])
|
||||
validator.check_integer("len of indices shape", len(indices_shape), 1, Rel.EQ)
|
||||
validator.check('the first dimension of grad', grad_shape[0],
|
||||
'the shape of indices', indices_shape[0], Rel.EQ)
|
||||
|
||||
return var_shape
|
||||
|
||||
def infer_dtype(self, var_type, accum_type, linear_type, grad_type, indices_type):
|
||||
validator.check_subclass("var_type", var_type, mstype.tensor)
|
||||
validator.check_subclass("accum_type", accum_type, mstype.tensor)
|
||||
validator.check_subclass("linear_type", linear_type, mstype.tensor)
|
||||
validator.check_subclass("grad_type", grad_type, mstype.tensor)
|
||||
validator.check_subclass("indices_type", indices_type, mstype.tensor)
|
||||
|
||||
return var_type
|
||||
|
||||
|
||||
class LARSUpdate(PrimitiveWithInfer):
|
||||
"""
|
||||
Conduct lars (layer-wise adaptive rate scaling) update on the square sum of gradient.
|
||||
|
@ -2244,4 +2317,4 @@ class ApplyFtrl(PrimitiveWithInfer):
|
|||
validator.check_typename("l1", l1_type,[mstype.float16, mstype.float32])
|
||||
validator.check_typename("l2", l2_type,[mstype.float16, mstype.float32])
|
||||
validator.check_typename("lr_power", lr_power_type,[mstype.float16, mstype.float32])
|
||||
return var_type
|
||||
return var_type
|
||||
|
|
|
@ -749,6 +749,11 @@ test_case_nn_ops = [
|
|||
'desc_inputs': [[3, 3], [3, 3], [3, 3], Tensor(np.ones((3,), np.int32))],
|
||||
'desc_bprop': [3, 3],
|
||||
'skip': ['backward']}),
|
||||
('SparseApplyFtrlD', {
|
||||
'block': P.SparseApplyFtrlD(0.1, 0.1, 0.1, -0.1),
|
||||
'desc_inputs': [[3, 3], [3, 3], [3, 3], [3, 3], Tensor(2*np.ones((3,), np.int32))],
|
||||
'desc_bprop': [3, 3],
|
||||
'skip': ['backward']}),
|
||||
('Flatten_1', {
|
||||
'block': NetForFlatten(),
|
||||
'desc_inputs': [Tensor(np.ones([2, 3, 4]).astype(np.int32)), Tensor(np.ones([2, 12]).astype(np.int32))],
|
||||
|
|
Loading…
Reference in New Issue