diff --git a/mindspore/lite/include/train_session.h b/mindspore/lite/include/train_session.h index 016cd41e842..f6012ab8527 100644 --- a/mindspore/lite/include/train_session.h +++ b/mindspore/lite/include/train_session.h @@ -27,7 +27,6 @@ struct Model; } namespace session { - class TrainSession : public lite::LiteSession { public: TrainSession(); diff --git a/mindspore/lite/nnacl/fp32/batchnorm.c b/mindspore/lite/nnacl/fp32/batchnorm.c index 49926d1c4a1..2ed427140eb 100644 --- a/mindspore/lite/nnacl/fp32/batchnorm.c +++ b/mindspore/lite/nnacl/fp32/batchnorm.c @@ -20,8 +20,8 @@ #include "nnacl/op_base.h" #include "nnacl/errorcode.h" -void BatchNormFp32(const void *input, const void *mean, const void *variance, - BatchNormParameter *param, int task_id, void *output) { +void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id, + void *output) { int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); int completed_units = task_id * units_per_thread; int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); @@ -31,7 +31,7 @@ void BatchNormFp32(const void *input, const void *mean, const void *variance, for (int c = 0; c < param->channel_; c++) { float variance_sqrt = sqrt(((const float *)variance)[c] + param->epsilon_); ((float *)output)[cur_offset + c] = - (((const float *)input)[cur_offset + c] - ((const float *)mean)[c]) / variance_sqrt; + (((const float *)input)[cur_offset + c] - ((const float *)mean)[c]) / variance_sqrt; } cur_offset += param->channel_; } @@ -53,3 +53,22 @@ void FusedBatchNormFp32(const void *input, const void *scale, const void *offset cur_offset += param->channel_; } } + +void FusedBatchNormFp32MeanVar(const float *input, float momentum, float *run_mean, float *run_var, + BatchNormParameter *param, float *save_mean, float *save_inv_var) { + float N = param->channel_ * param->unit_; + for (int i = 0; i < param->unit_; i++) { + for (int f = 0; f < param->channel_; f++) { + int idx = i * param->channel_ + f; + run_mean[f] += input[idx]; + run_var[f] += input[idx] * input[idx]; + } + } + for (int f = 0; f < param->channel_; f++) { + run_mean[f] = run_mean[f] / N; + run_var[f] = run_var[f] / N - run_mean[f] * run_mean[f]; + save_mean[f] = momentum * save_mean[f] + (1 - momentum) * run_mean[f]; + float inv_var = 1.f/sqrt(run_var[f]+param->epsilon_); + save_inv_var[f] = momentum * save_inv_var[f] + (1 - momentum) * inv_var; + } +} diff --git a/mindspore/lite/nnacl/fp32/batchnorm.h b/mindspore/lite/nnacl/fp32/batchnorm.h index 84b675c8db0..6defc90b7ee 100644 --- a/mindspore/lite/nnacl/fp32/batchnorm.h +++ b/mindspore/lite/nnacl/fp32/batchnorm.h @@ -28,6 +28,8 @@ void BatchNormFp32(const void *input, const void *mean, const void *variance, Ba void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean, const void *variance, BatchNormParameter *param, int task_id, void *output); +void FusedBatchNormFp32MeanVar(const float *input, float momentum, float *run_mean, float *run_var, + BatchNormParameter *param, float *save_mean, float *save_var); #ifdef __cplusplus } #endif diff --git a/mindspore/lite/nnacl/fp32_grad/batch_norm.c b/mindspore/lite/nnacl/fp32_grad/batch_norm.c index 76cb832b665..5f511e15c70 100644 --- a/mindspore/lite/nnacl/fp32_grad/batch_norm.c +++ b/mindspore/lite/nnacl/fp32_grad/batch_norm.c @@ -27,20 +27,40 @@ void sumSpatialBatch(const float *in, int size, int ch, float *out) { } } -void scaleBias(const float *scales, int batch, int n, int size, float *output) { - for (int i = 0; i < batch * size; i++) - for (int c = 0; c < n; c++) output[i * n + c] *= scales[c]; +static void meanVar(const float *in, int size, int ch, float eps, float *mean, float *invar) { + float N = (float)size; + sumSpatialBatch(in, N, ch, mean); + for (int f = 0; f < ch; ++f) { + mean[f] /= N; + } + for (int f=0; f< ch; f++) { + float tvar = 0; + for (int i =0; i< N; i++) { + float x = in[i*ch +f]; + tvar += (x-mean[f]) *(x-mean[f]); + } + invar[f] = 1.0f/(sqrt(tvar/N+eps)); + } } -void normalize(const float *x, const float *mean, const float *invar, int batch, int filters, int spatial, - float *out) { - int b, f, i; - for (b = 0; b < batch; ++b) { - for (i = 0; i < spatial; ++i) { - for (f = 0; f < filters; ++f) { - int index = b * filters * spatial + i * filters + f; - out[index] = (x[index] - mean[f]) * invar[f]; - } +void backwardX(const float *in, const float *dout, const float *scale, const int size, int channels, float eps, + float *mean, float *invar, float *dxhathat_sum, float *dxhat_sum, float *out) { + meanVar(in, size, channels, eps, mean, invar); + for (int i = 0; i < size; i++) { + for (int f = 0; f < channels; f++) { + int ix = i*channels + f; + float x_hat = (in[ix] - mean[f]) * invar[f]; + float dxhat = dout[ix] * scale[f]; + dxhat_sum[f] += dxhat; + dxhathat_sum[f] += dxhat * x_hat; + } + } + for (int i = 0; i < size; i++) { + for (int f = 0; f < channels; f++) { + int ix = i*channels + f; + float x_hat = (in[ix] - mean[f]) * invar[f]; + float dxhat = dout[ix] * scale[f]; + out[ix] = 1.f / size * invar[f] * (size * dxhat - dxhat_sum[f] - x_hat * dxhathat_sum[f]); } } } @@ -60,65 +80,3 @@ void backwardScale(const float *x, const float *mean, const float *invar, const } } -void meanVar(const float *in, int batch, int spatial, int ch, float eps, float *mean, float *invar) { - float N = batch * spatial; - sumSpatialBatch(in, N, ch, mean); - for (int f = 0; f < ch; ++f) { - mean[f] /= N; - } - for (int f=0; f< ch; f++) { - float tvar = 0; - for (int i =0; i< N; i++) { - float x = in[i*ch +f]; - tvar += (x-mean[f]) *(x-mean[f]); - } - invar[f] = 1.0f/(sqrt(tvar/N+eps)); - } -} - -void meanDelta(float *yt, int size, int ch, float *invar, float *mean_delta) { - sumSpatialBatch(yt, size, ch, mean_delta); - for (int i = 0; i < ch; i++) mean_delta[i] *= -invar[i]; -} - -void meanAdd(const float *x, const float *mean, const float *variance_delta, int batch, int filters, int spatial, - float *mean_add, float *mean_delta) { - int i, k; - memset(mean_add, 0, filters * sizeof(float)); - for (k = 0; k < spatial * batch; ++k) { - for (i = 0; i < filters; ++i) { - int index = k * filters + i; - mean_add[i] += x[index] - mean[i]; - } - } - for (i = 0; i < filters; ++i) { - mean_add[i] *= variance_delta[i] * (-2.f / (spatial * batch)); - mean_delta[i] += mean_add[i]; - } -} - -void varianceDelta(const float *x, const float *delta, const float *mean, const float *invar, int batch, int filters, - int spatial, float *variance_delta) { - int i, k; - memset(variance_delta, 0, filters * sizeof(float)); - for (k = 0; k < batch * spatial; k++) { - for (i = 0; i < filters; i++) { - int index = k * filters + i; - variance_delta[i] += delta[index] * (x[index] - mean[i]); - } - } - for (i = 0; i < filters; i++) variance_delta[i] *= -.5 * 1.0f/(invar[i]*invar[i]*invar[i]); -} - -void NormalizeDelta(const float *x, const float *mean, const float *invar, const float *mean_delta, - const float *variance_delta, int batch, int filters, int spatial, float *delta) { - int f, k; - for (k = 0; k < batch * spatial; k++) { - for (f = 0; f < filters; f++) { - int index = k * filters + f; - delta[index] = delta[index] * invar[f] + - variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + - mean_delta[f] / (spatial * batch); - } - } -} diff --git a/mindspore/lite/nnacl/fp32_grad/batch_norm.h b/mindspore/lite/nnacl/fp32_grad/batch_norm.h index f66e5508e05..fdde809ba42 100644 --- a/mindspore/lite/nnacl/fp32_grad/batch_norm.h +++ b/mindspore/lite/nnacl/fp32_grad/batch_norm.h @@ -30,18 +30,11 @@ extern "C" { #endif void sumSpatialBatch(const float *in, int size, int ch, float *out); -void scaleBias(const float *scales, int batch, int n, int size, float *output); -void normalize(const float *x, const float *mean, const float *invar, int batch, int filters, int spatial, float *out); -void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, int n, - int size, float *scale_updates); -void meanVar(const float *in, int batch, int size, int ch, float eps, float *mean, float *invar); -void meanDelta(float *yt, int size, int ch, float *invar, float *mean_delta); -void varianceDelta(const float *x, const float *delta, const float *mean, const float *invar, int batch, int ch, - int spatial, float *variance_delta); -void meanAdd(const float *x, const float *mean, const float *variance_delta, int batch, int filters, int spatial, - float *mean_add, float *mean_delta); -void NormalizeDelta(const float *x, const float *mean, const float *invar, const float *mean_delta, - const float *variance_delta, int batch, int filters, int spatial, float *delta); +void backwardX(const float *in, const float *dout, const float *scale, const int size, int channels, float eps, + float *mean, float *invar, float *xhat_sum, float *dxhat_sum, float *out); +void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, + int n, int size, float *scale_updates); + #ifdef __cplusplus } #endif diff --git a/mindspore/lite/nnacl/fp32_grad/softmax_grad.c b/mindspore/lite/nnacl/fp32_grad/softmax_grad.c new file mode 100644 index 00000000000..8e21a029091 --- /dev/null +++ b/mindspore/lite/nnacl/fp32_grad/softmax_grad.c @@ -0,0 +1,61 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnacl/fp32_grad/softmax_grad.h" +#include +#include "nnacl/fp32_grad/gemm.h" + +void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, float *sum_data, float *sum_mul, + SoftmaxParameter *parameter) { + int32_t axis = parameter->axis_; + int n_dim = parameter->n_dim_; + int ele_size = parameter->element_size_; + int *input_shape = parameter->input_shape_; + int dim = 1; + + int inner_size = 1, outter_size = 1; + for (int i = 0; i < axis; i++) { + outter_size *= input_shape[i]; + } + for (int i = axis + 1; i < n_dim; i++) { + inner_size *= input_shape[i]; + } + + for (int i = 0; i < inner_size * input_shape[axis]; i++) sum_mul[i] = 1.0; + for (int i = 0; i < n_dim; i++) dim *= input_shape[i]; + dim /= outter_size; + memcpy(output_ptr, yt_ptr, ele_size * sizeof(float)); + + int M = input_shape[axis]; + int N = inner_size; + int K = 1; + for (int i = 0; i < outter_size; i++) { + int outter_offset = i * dim; + memset(sum_data, 0.0f, inner_size * sizeof(float)); + for (int k = 0; k < inner_size; k++) { + int inner_offset = outter_offset + k; + for (int j = 0; j < input_shape[axis]; j++) { + int offset = inner_offset + j * inner_size; + sum_data[k] += output_ptr[offset] * input_ptr[offset]; + } + } + gemm(0, 0, M, N, K, -1, sum_mul, K, sum_data, N, 1, &output_ptr[outter_offset], N); + } + + for (int i = 0; i < ele_size; i++) { + output_ptr[i] *= input_ptr[i]; + } +} diff --git a/mindspore/lite/nnacl/fp32_grad/softmax_grad.h b/mindspore/lite/nnacl/fp32_grad/softmax_grad.h index 907810982d2..ad6f4fc8b20 100644 --- a/mindspore/lite/nnacl/fp32_grad/softmax_grad.h +++ b/mindspore/lite/nnacl/fp32_grad/softmax_grad.h @@ -14,10 +14,15 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_NNACL_FP32_SOFTMAX_GRAD_H_ -#define MINDSPORE_LITE_NNACL_FP32_SOFTMAX_GRAD_H_ +#ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_SOFTMAX_GRAD_H_ +#define MINDSPORE_LITE_NNACL_FP32_GRAD_SOFTMAX_GRAD_H_ #include "nnacl/op_base.h" +#include "nnacl/fp32/softmax.h" + +#ifdef __cplusplus +extern "C" { +#endif typedef struct SoftmaxCrossEntropyParameter { OpParameter op_parameter_; @@ -26,4 +31,11 @@ typedef struct SoftmaxCrossEntropyParameter { int n_dim_; int input_shape_[5]; } SoftmaxCrossEntropyParameter; -#endif // MINDSPORE_LITE_NNACL_FP32_SOFTMAX_GRAD_H_ + +void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, float *sum_data, + float *sum_mul, SoftmaxParameter *parameter); +#ifdef __cplusplus +} +#endif + +#endif // MINDSPORE_LITE_NNACL_FP32_GRAD_SOFTMAX_GRAD_H_ diff --git a/mindspore/lite/src/common/file_utils_ext.cc b/mindspore/lite/src/common/file_utils_ext.cc index 91851f8b907..39e9983ced1 100644 --- a/mindspore/lite/src/common/file_utils_ext.cc +++ b/mindspore/lite/src/common/file_utils_ext.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { -static int CompareOutputRelativeData(float *output_data, float *correct_data, int data_size) { +static float CompareOutputRelativeData(float *output_data, float *correct_data, int data_size) { float error = 0; // relative error @@ -35,6 +35,16 @@ static int CompareOutputRelativeData(float *output_data, float *correct_data, in diffSum += diff; } error = diffSum / sum; + return error; +} + +int CompareRelativeOutput(float *output_data, std::string file_path) { + size_t output_size; + auto ground_truth = reinterpret_cast(mindspore::lite::ReadFile(file_path.c_str(), &output_size)); + size_t output_num = output_size / sizeof(float); + // std::cout << "output num : " << output_num << "\n"; + int error = CompareOutputRelativeData(output_data, ground_truth, output_num); + delete [] ground_truth; if (error > 1e-4) { std::cout << "has accuracy error!\n" << error << "\n"; return 1; @@ -42,14 +52,15 @@ static int CompareOutputRelativeData(float *output_data, float *correct_data, in return 0; } -int CompareRelativeOutput(float *output_data, std::string file_path) { +float RelativeOutputError(float *output_data, std::string file_path) { size_t output_size; auto ground_truth = reinterpret_cast(mindspore::lite::ReadFile(file_path.c_str(), &output_size)); size_t output_num = output_size / sizeof(float); std::cout << "output num : " << output_num << "\n"; - int res = CompareOutputRelativeData(output_data, ground_truth, output_num); - delete[] ground_truth; - return res; + float error = CompareOutputRelativeData(output_data, ground_truth, output_num); + delete [] ground_truth; + return error; } } // namespace lite } // namespace mindspore + diff --git a/mindspore/lite/src/common/file_utils_ext.h b/mindspore/lite/src/common/file_utils_ext.h index 6f4cb0a7eab..086e6c23630 100644 --- a/mindspore/lite/src/common/file_utils_ext.h +++ b/mindspore/lite/src/common/file_utils_ext.h @@ -21,6 +21,7 @@ namespace mindspore { namespace lite { int CompareRelativeOutput(float *output_data, std::string file_path); +float RelativeOutputError(float *output_data, std::string file_path); } } // namespace mindspore #endif // MINDSPORE_LITE_COMMON_FILE_UTILS_EXT_H_ diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc index 569f2680920..e8279cc505a 100644 --- a/mindspore/lite/src/lite_session.cc +++ b/mindspore/lite/src/lite_session.cc @@ -32,13 +32,16 @@ namespace mindspore { namespace lite { - static std::vector packed_op = { schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, schema::PrimitiveType_DepthwiseConv2D, schema::PrimitiveType_DeDepthwiseConv2D, schema::PrimitiveType_MatMul}; // this method will not check whether tensor_idx is a weight tensor index, caller should ensure this. static bool WeightTensorNeedCopy(const lite::Model *model, const uint32_t tensor_idx) { +#ifdef SUPPORT_TRAIN + return false; +#endif + MS_ASSERT(model != nullptr); auto post_node_idxes = GetLinkedPostNodeIdx(model, tensor_idx); return std::none_of(post_node_idxes.begin(), post_node_idxes.end(), [&](const size_t &post_node_idx) { @@ -267,7 +270,9 @@ int LiteSession::CompileGraph(Model *model) { } executor->Prepare(this->kernels_); +#ifndef SUPPORT_TRAIN model->Free(); +#endif return RET_OK; } diff --git a/mindspore/lite/src/model.cc b/mindspore/lite/src/model.cc index 93316260711..3282764182d 100644 --- a/mindspore/lite/src/model.cc +++ b/mindspore/lite/src/model.cc @@ -42,9 +42,11 @@ bool ConvertNodes(const schema::MetaGraph *meta_graph, Model *model) { for (uint32_t j = 0; j < count; ++j) { node->input_indices_.push_back(size_t(c_node->inputIndex()->GetAs(j))); } - count = c_node->outputIndex()->size(); - for (uint32_t j = 0; j < count; ++j) { - node->output_indices_.push_back(size_t(c_node->outputIndex()->GetAs(j))); + if (c_node->outputIndex() != nullptr) { + count = c_node->outputIndex()->size(); + for (uint32_t j = 0; j < count; ++j) { + node->output_indices_.push_back(size_t(c_node->outputIndex()->GetAs(j))); + } } model->nodes_.push_back(node); } diff --git a/mindspore/lite/src/ops/activation_grad.cc b/mindspore/lite/src/ops/activation_grad.cc index a3cb09c40a7..2b5bc176804 100644 --- a/mindspore/lite/src/ops/activation_grad.cc +++ b/mindspore/lite/src/ops/activation_grad.cc @@ -46,6 +46,8 @@ int ActivationGrad::UnPackAttr(const Primitive &prim, const std::vectortype = schema::ActivationType_RELU6; } + // auto alpha = GetValue(prim.GetAttr("alpha")); + attr->alpha = 0; // alpha; this->primitive_->value.value = attr.release(); if (this->primitive_->value.value == nullptr) { MS_LOG(ERROR) << "new primitiveT value failed"; diff --git a/mindspore/lite/src/ops/apply_momentum.cc b/mindspore/lite/src/ops/apply_momentum.cc index d46b1de2093..fa86ad02421 100644 --- a/mindspore/lite/src/ops/apply_momentum.cc +++ b/mindspore/lite/src/ops/apply_momentum.cc @@ -16,7 +16,6 @@ #include "src/ops/apply_momentum.h" namespace mindspore { namespace lite { - #ifdef PRIMITIVE_WRITEABLE int ApplyMomentum::UnPackAttr(const Primitive &prim, const std::vector &inputs) { if (this->primitive_ == nullptr) { @@ -31,11 +30,17 @@ int ApplyMomentum::UnPackAttr(const Primitive &prim, const std::vectorprimitive_->value.type; return RET_ERROR; } - auto attr = std::make_unique(); - this->primitive_->value.value = attr.release(); if (this->primitive_->value.value == nullptr) { - MS_LOG(ERROR) << "new primitiveT value failed"; - return RET_ERROR; + auto attr = std::make_unique(); + if (attr == nullptr) { + MS_LOG(ERROR) << "new primitiveT value failed"; + return RET_ERROR; + } + this->primitive_->value.value = attr.release(); + if (this->primitive_->value.value == nullptr) { + MS_LOG(ERROR) << "new primitiveT value failed"; + return RET_ERROR; + } } return RET_OK; } @@ -49,13 +54,13 @@ int ApplyMomentum::UnPackToFlatBuilder(const schema::Primitive *primitive, flatb return RET_ERROR; } auto val_offset = schema::CreateApplyMomentum(*fbb); - auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_ActivationGrad, val_offset.o); + auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_ApplyMomentum, val_offset.o); fbb->Finish(prim_offset); return RET_OK; } #endif -int ApplyMomentum::InferShape(std::vector inputs, std::vector outputs) { +int ApplyMomentum::InferShape(std::vector inputs, std::vector outputs) { if (5 != inputs.size()) { MS_LOG(ERROR) << "ApplyMomentum should have at 5 input tensors"; return RET_ERROR; diff --git a/mindspore/lite/src/ops/arithmetic_grad.cc b/mindspore/lite/src/ops/arithmetic_grad.cc index d5d234dc91b..490269e9a61 100644 --- a/mindspore/lite/src/ops/arithmetic_grad.cc +++ b/mindspore/lite/src/ops/arithmetic_grad.cc @@ -48,6 +48,9 @@ int ArithmeticGrad::InferShape(std::vector inputs_, std::vector< if ((Type() == schema::PrimitiveType_AddGrad) || (Type() == schema::PrimitiveType_SubGrad)) { ndim_ = outShape.size(); + x1_shape_.resize(ndim_); + x2_shape_.resize(ndim_); + dy_shape_.resize(ndim_); auto fillDimNum0 = outShape.size() - inShape0.size(); auto fillDimNum1 = outShape.size() - inShape1.size(); int j0 = 0; @@ -61,6 +64,9 @@ int ArithmeticGrad::InferShape(std::vector inputs_, std::vector< // if (inShape0.size() < inShape1.size()) if (dx1->ElementsNum() < dx2->ElementsNum()) { ndim_ = inShape1.size(); + x1_shape_.resize(ndim_); + x2_shape_.resize(ndim_); + dy_shape_.resize(ndim_); auto fillDimNum = inShape1.size() - inShape0.size(); // This will not work for batch! int j = 0; for (unsigned int i = 0; i < inShape1.size(); i++) { @@ -74,8 +80,10 @@ int ArithmeticGrad::InferShape(std::vector inputs_, std::vector< } } else if (dx2->ElementsNum() < dx1->ElementsNum()) { // if (inShape0.size() > inShape1.size()) ndim_ = inShape0.size(); + x1_shape_.resize(ndim_); + x2_shape_.resize(ndim_); + dy_shape_.resize(ndim_); broadcasting_ = true; - ndim_ = inShape0.size(); int j = 0; auto fillDimNum = inShape0.size() - inShape1.size(); for (unsigned int i = 0; i < inShape0.size(); i++) { diff --git a/mindspore/lite/src/ops/arithmetic_grad.h b/mindspore/lite/src/ops/arithmetic_grad.h index f05ced2936c..d01f8620d79 100644 --- a/mindspore/lite/src/ops/arithmetic_grad.h +++ b/mindspore/lite/src/ops/arithmetic_grad.h @@ -32,7 +32,7 @@ class ArithmeticGrad : public PrimitiveC { ArithmeticGrad() = default; explicit ArithmeticGrad(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {} #else - // explicit Arithmetic(schema::Primitive *primitive) : PrimitiveC(primitive) {} +// explicit ArithmeticGrad(const schema::Primitive &primitive) : PrimitiveC(primitive) {} ArithmeticGrad() = default; int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override { return RET_ERROR; diff --git a/mindspore/lite/src/ops/bias_grad.cc b/mindspore/lite/src/ops/bias_grad.cc index d54a9c95294..cee3f9cb272 100644 --- a/mindspore/lite/src/ops/bias_grad.cc +++ b/mindspore/lite/src/ops/bias_grad.cc @@ -41,6 +41,7 @@ int BiasGrad::UnPackAttr(const Primitive &prim, const std::vector &i MS_LOG(ERROR) << "new primitiveT value failed"; return RET_ERROR; } + attr->axis = {0}; // GetValue>(prim.GetAttr("axis")); this->primitive_->value.value = attr; if (this->primitive_->value.value == nullptr) { MS_LOG(ERROR) << "primitive value is nullptr"; @@ -73,6 +74,7 @@ std::vector BiasGrad::GetAxis() const { auto fb_vector = this->primitive_->value_as_BiasGrad()->axis(); return std::vector(fb_vector->begin(), fb_vector->end()); } +#endif int BiasGrad::InferShape(std::vector inputs, std::vector outputs) { if (1 != inputs.size()) { @@ -99,6 +101,5 @@ int BiasGrad::InferShape(std::vector inputs, std::vector out return RET_OK; } -#endif } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/ops/bias_grad.h b/mindspore/lite/src/ops/bias_grad.h index 63291793463..cfcb25fef2c 100644 --- a/mindspore/lite/src/ops/bias_grad.h +++ b/mindspore/lite/src/ops/bias_grad.h @@ -38,8 +38,8 @@ class BiasGrad : public PrimitiveC { BiasGrad() = default; int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; - int InferShape(std::vector inputs, std::vector outputs) override; #endif + int InferShape(std::vector inputs, std::vector outputs) override; std::vector GetAxis() const; }; } // namespace lite diff --git a/mindspore/lite/src/ops/bn_grad.cc b/mindspore/lite/src/ops/bn_grad.cc index 18015c523cf..d36875cc35c 100644 --- a/mindspore/lite/src/ops/bn_grad.cc +++ b/mindspore/lite/src/ops/bn_grad.cc @@ -67,9 +67,31 @@ int BNGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers: fbb->Finish(prim_offset); return RET_OK; } + float BNGrad::GetEps() const { return this->primitive_->value_as_BNGrad()->eps(); } float BNGrad::GetMomentum() const { return this->primitive_->value_as_BNGrad()->momentum(); } - #endif +int BNGrad::InferShape(std::vector inputs, std::vector outputs) { + if (5 != inputs.size()) { + MS_LOG(ERROR) << "BNGrad should have five inputs"; + return RET_ERROR; + } + if (3 != outputs.size()) { + MS_LOG(ERROR) << "BNGrad should have three outputs"; + return RET_ERROR; + } + auto in = inputs[1]; + auto scale = inputs[2]; + outputs[0]->set_shape(in->shape()); + outputs[1]->set_shape(scale->shape()); + outputs[2]->set_shape(scale->shape()); + outputs[0]->set_data_type(in->data_type()); + outputs[1]->set_data_type(scale->data_type()); + outputs[2]->set_data_type(scale->data_type()); + outputs[0]->SetFormat(in->GetFormat()); + outputs[1]->SetFormat(scale->GetFormat()); + outputs[2]->SetFormat(scale->GetFormat()); + return RET_OK; +} } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/ops/bn_grad.h b/mindspore/lite/src/ops/bn_grad.h index fd9baf618ea..a4441f2351f 100644 --- a/mindspore/lite/src/ops/bn_grad.h +++ b/mindspore/lite/src/ops/bn_grad.h @@ -38,6 +38,8 @@ class BNGrad : public PrimitiveC { BNGrad() = default; int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; #endif + int InferShape(std::vector inputs_, + std::vector outputs_) override; float GetEps() const; float GetMomentum() const; }; diff --git a/mindspore/lite/src/ops/bn_grad_input.cc b/mindspore/lite/src/ops/bn_grad_input.cc deleted file mode 100644 index d243764979f..00000000000 --- a/mindspore/lite/src/ops/bn_grad_input.cc +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "src/ops/bn_grad_input.h" - -namespace mindspore { -namespace lite { -#ifdef PRIMITIVE_WRITEABLE -float BNGradInput::GetEps() const { return this->primitive_->value.AsBNGradInput()->eps; } -float BNGradInput::GetMomentum() const { return this->primitive_->value.AsBNGradInput()->momentum; } - -void BNGradInput::SetEps(float eps) { this->primitive_->value.AsBNGradInput()->eps = eps; } -void BNGradInput::SetMomentum(float momentum) { this->primitive_->value.AsBNGradInput()->momentum = momentum; } -int BNGradInput::UnPackAttr(const Primitive &prim, const std::vector &inputs) { - if (this->primitive_ == nullptr) { - this->primitive_ = new (std::nothrow) schema::PrimitiveT; - if (this->primitive_ == nullptr) { - MS_LOG(ERROR) << "new primitiveT failed"; - return RET_ERROR; - } - this->primitive_->value.type = schema::PrimitiveType_BNGradInput; - } - if (this->primitive_->value.type != schema::PrimitiveType_BNGradInput) { - MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; - return RET_ERROR; - } - if (this->primitive_->value.value == nullptr) { - auto attr = new (std::nothrow) schema::BNGradInputT(); - if (attr == nullptr) { - MS_LOG(ERROR) << "new primitiveT value failed"; - return RET_ERROR; - } - attr->eps = GetValue(prim.GetAttr("eps")); - attr->momentum = GetValue(prim.GetAttr("momentum")); - this->primitive_->value.value = attr; - if (this->primitive_->value.value == nullptr) { - MS_LOG(ERROR) << "primitive value is nullptr"; - return RET_ERROR; - } - } - return RET_OK; -} -#else -int BNGradInput::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { - MS_ASSERT(nullptr != primitive); - MS_ASSERT(nullptr != fbb); - auto attr = primitive->value_as_BNGradInput(); - if (attr == nullptr) { - MS_LOG(ERROR) << "value_as_BNGradInputInput return nullptr"; - return RET_ERROR; - } - auto val_offset = schema::CreateBNGradInput(*fbb, attr->eps(), attr->momentum()); - auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_BNGradInput, val_offset.o); - fbb->Finish(prim_offset); - return RET_OK; -} -float BNGradInput::GetEps() const { return this->primitive_->value_as_BNGradInput()->eps(); } -float BNGradInput::GetMomentum() const { return this->primitive_->value_as_BNGradInput()->momentum(); } - -#endif -} // namespace lite -} // namespace mindspore diff --git a/mindspore/lite/src/ops/bn_grad_input.h b/mindspore/lite/src/ops/bn_grad_input.h deleted file mode 100644 index 52645f83f50..00000000000 --- a/mindspore/lite/src/ops/bn_grad_input.h +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LITE_MINDSPORE_LITE_C_OPS_B_N_GRAD_INPUT_H_ -#define LITE_MINDSPORE_LITE_C_OPS_B_N_GRAD_INPUT_H_ - -#include -#include -#include -#include "ir/dtype/type_id.h" -#include "src/ops/primitive_c.h" - -namespace mindspore { -namespace lite { -class BNGradInput : public PrimitiveC { - public: -#ifdef PRIMITIVE_WRITEABLE - MS_DECLARE_PARENT(BNGradInput, PrimitiveC); - BNGradInput() = default; - explicit BNGradInput(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {} - void SetEps(float eps); - void SetMomentum(float momentum); - int UnPackAttr(const Primitive &prim, const std::vector &inputs) override; -#else - BNGradInput() = default; - int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; -#endif - float GetEps() const; - float GetMomentum() const; -}; -} // namespace lite -} // namespace mindspore - -#endif // LITE_MINDSPORE_LITE_C_OPS_B_N_GRAD_INPUT_H_ diff --git a/mindspore/lite/src/ops/conv2d_grad_filter.cc b/mindspore/lite/src/ops/conv2d_grad_filter.cc index 96c6274808b..5004b48ab54 100644 --- a/mindspore/lite/src/ops/conv2d_grad_filter.cc +++ b/mindspore/lite/src/ops/conv2d_grad_filter.cc @@ -66,108 +66,7 @@ void Conv2DGradFilter::SetHasBias(bool has_bias) { this->primitive_->value.AsCon void Conv2DGradFilter::SetActivationType(int activation_type) { this->primitive_->value.AsConv2DGradFilter()->activationType = (schema::ActivationType)activation_type; } -void Conv2DGradFilter::PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, - const std::vector &inputs) { - auto attr = std::make_unique(); - auto format = GetValue(prim.GetAttr("data_format")); - if (format == "NCHW") { - attr->format = schema::Format_NCHW; - } else if (format == "NHWC") { - attr->format = schema::Format_NHWC; - } else { - attr->format = schema::Format_NUM_OF_FORMAT; - } - auto pad_list = GetValue>(prim.GetAttr("pad_list")); - attr->padUp = pad_list[0]; - attr->padDown = pad_list[1]; - attr->padLeft = pad_list[2]; - attr->padRight = pad_list[3]; - auto dilation = GetValue>(prim.GetAttr("dilation")); - attr->dilateH = dilation[0]; - attr->dilateW = dilation[1]; - - auto kernel_size = GetValue>(prim.GetAttr("kernel_size")); - attr->kernelH = kernel_size[0]; - attr->kernelW = kernel_size[1]; - - auto stride = GetValue>(prim.GetAttr("stride")); - attr->strideH = stride[2]; - attr->strideW = stride[3]; - - auto pad_mode = GetValue(prim.GetAttr("pad_mode")); - if (pad_mode == "valid") { - attr->padMode = schema::PadMode_VALID; - } else if (pad_mode == "same") { - attr->padMode = schema::PadMode_SAME; - } else { - attr->padMode = schema::PadMode_NOTSET; - } - - if (prim.GetAttr("activation_name") != nullptr) { - std::string activate_name = GetValue(prim.GetAttr("activation_name")); - attr->activationType = kActivationTypeMap[activate_name]; - } else { - attr->activationType = schema::ActivationType_NO_ACTIVATION; - } - - int channel_mutiplier = 1; - if (prim.GetAttr("channel_mutiplier") != nullptr) { - channel_mutiplier = GetValue(prim.GetAttr("channel_multiplier")); - } - attr->channelMultiplier = channel_mutiplier; - primitive->value.value = attr.release(); -} - -void Conv2DGradFilter::PopulaterConv2DSingleGroup(const Primitive &prim, - schema::PrimitiveT *primitive, const int &group) { - auto attr = std::make_unique(); - attr->group = group; - auto format = GetValue(prim.GetAttr("data_format")); - if (format == "NCHW") { - attr->format = schema::Format_NCHW; - } else if (format == "NHWC") { - attr->format = schema::Format_NHWC; - } else { - attr->format = schema::Format_NUM_OF_FORMAT; - } - auto pad_list = GetValue>(prim.GetAttr("pad_list")); - attr->padUp = pad_list[0]; - attr->padDown = pad_list[1]; - attr->padLeft = pad_list[2]; - attr->padRight = pad_list[3]; - - auto dilation = GetValue>(prim.GetAttr("dilation")); - attr->dilateH = dilation[0]; - attr->dilateW = dilation[1]; - - auto kernel_size = GetValue>(prim.GetAttr("kernel_size")); - attr->kernelH = kernel_size[0]; - attr->kernelW = kernel_size[1]; - - auto stride = GetValue>(prim.GetAttr("stride")); - attr->strideH = stride[2]; - attr->strideW = stride[3]; - - attr->channelOut = GetValue(prim.GetAttr("out_channel")); - - auto pad_mode = GetValue(prim.GetAttr("pad_mode")); - if (pad_mode == "valid") { - attr->padMode = schema::PadMode_VALID; - } else if (pad_mode == "same") { - attr->padMode = schema::PadMode_SAME; - } else { - attr->padMode = schema::PadMode_NOTSET; - } - - if (prim.GetAttr("activation_name") != nullptr) { - std::string activate_name = GetValue(prim.GetAttr("activation_name")); - attr->activationType = kActivationTypeMap[activate_name]; - } else { - attr->activationType = schema::ActivationType_NO_ACTIVATION; - } - primitive->value.value = attr.release(); -} int Conv2DGradFilter::UnPackAttr(const Primitive &prim, const std::vector &inputs) { if (this->primitive_ == nullptr) { this->primitive_ = new (std::nothrow) schema::PrimitiveT; @@ -181,11 +80,62 @@ int Conv2DGradFilter::UnPackAttr(const Primitive &prim, const std::vectorprimitive_->value.type; return RET_ERROR; } - int group = GetValue(prim.GetAttr("group")); - if (group > 1) { - PopulaterConv2DMultiGroup(prim, this->primitive_, group, inputs); - } else { - PopulaterConv2DSingleGroup(prim, this->primitive_, group); + + if (this->primitive_->value.value == nullptr) { + auto attr = new (std::nothrow) schema::Conv2DGradFilterT(); + if (attr == nullptr) { + MS_LOG(ERROR) << "new primitiveT value failed"; + return RET_ERROR; + } + attr->group = GetValue(prim.GetAttr("group")); + auto format = GetValue(prim.GetAttr("data_format")); + if (format == "NCHW") { + attr->format = schema::Format_NCHW; + } else if (format == "NHWC") { + attr->format = schema::Format_NHWC; + } else { + attr->format = schema::Format_NUM_OF_FORMAT; + } + auto pad_list = GetValue>(prim.GetAttr("pad_list")); + attr->padUp = pad_list[0]; + attr->padDown = pad_list[1]; + attr->padLeft = pad_list[2]; + attr->padRight = pad_list[3]; + + auto dilation = GetValue>(prim.GetAttr("dilation")); + attr->dilateH = dilation[0]; + attr->dilateW = dilation[1]; + + auto kernel_size = GetValue>(prim.GetAttr("kernel_size")); + attr->kernelH = kernel_size[0]; + attr->kernelW = kernel_size[1]; + + auto stride = GetValue>(prim.GetAttr("stride")); + attr->strideH = stride[0]; + attr->strideW = stride[1]; + + attr->channelOut = GetValue(prim.GetAttr("out_channel")); + auto pad_mode = GetValue(prim.GetAttr("pad_mode")); + if (pad_mode == "valid") { + attr->padMode = schema::PadMode_VALID; + } else if (pad_mode == "same") { + attr->padMode = schema::PadMode_SAME; + } else { + attr->padMode = schema::PadMode_NOTSET; + } + + if (prim.GetAttr("activation_name") != nullptr) { + std::string activate_name = GetValue(prim.GetAttr("activation_name")); + attr->activationType = kActivationTypeMap[activate_name]; + } else { + attr->activationType = schema::ActivationType_NO_ACTIVATION; + } + + this->primitive_->value.value = attr; + if (this->primitive_->value.value == nullptr) { + MS_LOG(ERROR) << "primitive value is nullptr"; + return RET_ERROR; + } } return RET_OK; } @@ -268,6 +218,5 @@ int Conv2DGradFilter::InferShape(std::vector inputs, std::vector &inputs) override; - void PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, - const std::vector &inputs); - void PopulaterConv2DSingleGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group); #else Conv2DGradFilter() = default; diff --git a/mindspore/lite/src/ops/conv2d_grad_input.cc b/mindspore/lite/src/ops/conv2d_grad_input.cc index 03e5c9b55ab..cd2c2753ce7 100644 --- a/mindspore/lite/src/ops/conv2d_grad_input.cc +++ b/mindspore/lite/src/ops/conv2d_grad_input.cc @@ -64,108 +64,7 @@ void Conv2DGradInput::SetHasBias(bool has_bias) { this->primitive_->value.AsConv void Conv2DGradInput::SetActivationType(int activation_type) { this->primitive_->value.AsConv2DGradInput()->activationType = (schema::ActivationType)activation_type; } -void Conv2DGradInput::PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, - const std::vector &inputs) { - auto attr = std::make_unique(); - auto format = GetValue(prim.GetAttr("data_format")); - if (format == "NCHW") { - attr->format = schema::Format_NCHW; - } else if (format == "NHWC") { - attr->format = schema::Format_NHWC; - } else { - attr->format = schema::Format_NUM_OF_FORMAT; - } - auto pad_list = GetValue>(prim.GetAttr("pad_list")); - attr->padUp = pad_list[0]; - attr->padDown = pad_list[1]; - attr->padLeft = pad_list[2]; - attr->padRight = pad_list[3]; - auto dilation = GetValue>(prim.GetAttr("dilation")); - attr->dilateH = dilation[0]; - attr->dilateW = dilation[1]; - - auto kernel_size = GetValue>(prim.GetAttr("kernel_size")); - attr->kernelH = kernel_size[0]; - attr->kernelW = kernel_size[1]; - - auto stride = GetValue>(prim.GetAttr("stride")); - attr->strideH = stride[2]; - attr->strideW = stride[3]; - - auto pad_mode = GetValue(prim.GetAttr("pad_mode")); - if (pad_mode == "valid") { - attr->padMode = schema::PadMode_VALID; - } else if (pad_mode == "same") { - attr->padMode = schema::PadMode_SAME; - } else { - attr->padMode = schema::PadMode_NOTSET; - } - - if (prim.GetAttr("activation_name") != nullptr) { - std::string activate_name = GetValue(prim.GetAttr("activation_name")); - attr->activationType = kActivationTypeMap[activate_name]; - } else { - attr->activationType = schema::ActivationType_NO_ACTIVATION; - } - - int channel_mutiplier = 1; - if (prim.GetAttr("channel_mutiplier") != nullptr) { - channel_mutiplier = GetValue(prim.GetAttr("channel_multiplier")); - } - attr->channelMultiplier = channel_mutiplier; - primitive->value.value = attr.release(); -} - -void Conv2DGradInput::PopulaterConv2DSingleGroup(const Primitive &prim, - schema::PrimitiveT *primitive, const int &group) { - auto attr = std::make_unique(); - attr->group = group; - auto format = GetValue(prim.GetAttr("data_format")); - if (format == "NCHW") { - attr->format = schema::Format_NCHW; - } else if (format == "NHWC") { - attr->format = schema::Format_NHWC; - } else { - attr->format = schema::Format_NUM_OF_FORMAT; - } - auto pad_list = GetValue>(prim.GetAttr("pad_list")); - attr->padUp = pad_list[0]; - attr->padDown = pad_list[1]; - attr->padLeft = pad_list[2]; - attr->padRight = pad_list[3]; - - auto dilation = GetValue>(prim.GetAttr("dilation")); - attr->dilateH = dilation[0]; - attr->dilateW = dilation[1]; - - auto kernel_size = GetValue>(prim.GetAttr("kernel_size")); - attr->kernelH = kernel_size[0]; - attr->kernelW = kernel_size[1]; - - auto stride = GetValue>(prim.GetAttr("stride")); - attr->strideH = stride[2]; - attr->strideW = stride[3]; - - attr->channelOut = GetValue(prim.GetAttr("out_channel")); - - auto pad_mode = GetValue(prim.GetAttr("pad_mode")); - if (pad_mode == "valid") { - attr->padMode = schema::PadMode_VALID; - } else if (pad_mode == "same") { - attr->padMode = schema::PadMode_SAME; - } else { - attr->padMode = schema::PadMode_NOTSET; - } - - if (prim.GetAttr("activation_name") != nullptr) { - std::string activate_name = GetValue(prim.GetAttr("activation_name")); - attr->activationType = kActivationTypeMap[activate_name]; - } else { - attr->activationType = schema::ActivationType_NO_ACTIVATION; - } - primitive->value.value = attr.release(); -} int Conv2DGradInput::UnPackAttr(const Primitive &prim, const std::vector &inputs) { if (this->primitive_ == nullptr) { this->primitive_ = new (std::nothrow) schema::PrimitiveT; @@ -179,11 +78,63 @@ int Conv2DGradInput::UnPackAttr(const Primitive &prim, const std::vectorprimitive_->value.type; return RET_ERROR; } - int group = GetValue(prim.GetAttr("group")); - if (group > 1) { - PopulaterConv2DMultiGroup(prim, this->primitive_, group, inputs); - } else { - PopulaterConv2DSingleGroup(prim, this->primitive_, group); + + if (this->primitive_->value.value == nullptr) { + auto attr = new (std::nothrow) schema::Conv2DGradInputT(); + if (attr == nullptr) { + MS_LOG(ERROR) << "new primitiveT value failed"; + return RET_ERROR; + } + attr->group = GetValue(prim.GetAttr("group")); + auto format = GetValue(prim.GetAttr("data_format")); + if (format == "NCHW") { + attr->format = schema::Format_NCHW; + } else if (format == "NHWC") { + attr->format = schema::Format_NHWC; + } else { + attr->format = schema::Format_NUM_OF_FORMAT; + } + auto pad_list = GetValue>(prim.GetAttr("pad_list")); + attr->padUp = pad_list[0]; + attr->padDown = pad_list[1]; + attr->padLeft = pad_list[2]; + attr->padRight = pad_list[3]; + + auto dilation = GetValue>(prim.GetAttr("dilation")); + attr->dilateH = dilation[0]; + attr->dilateW = dilation[1]; + + auto kernel_size = GetValue>(prim.GetAttr("kernel_size")); + attr->kernelH = kernel_size[0]; + attr->kernelW = kernel_size[1]; + + auto stride = GetValue>(prim.GetAttr("stride")); + attr->strideH = stride[0]; + attr->strideW = stride[1]; + + attr->channelOut = GetValue(prim.GetAttr("out_channel")); + + auto pad_mode = GetValue(prim.GetAttr("pad_mode")); + if (pad_mode == "valid") { + attr->padMode = schema::PadMode_VALID; + } else if (pad_mode == "same") { + attr->padMode = schema::PadMode_SAME; + } else { + attr->padMode = schema::PadMode_NOTSET; + } + + if (prim.GetAttr("activation_name") != nullptr) { + std::string activate_name = GetValue(prim.GetAttr("activation_name")); + attr->activationType = kActivationTypeMap[activate_name]; + } else { + attr->activationType = schema::ActivationType_NO_ACTIVATION; + } + + this->primitive_->value.value = attr; + if (this->primitive_->value.value == nullptr) { + MS_LOG(ERROR) << "primitive value is nullptr"; + return RET_ERROR; + } } return RET_OK; } @@ -265,6 +216,5 @@ int Conv2DGradInput::InferShape(std::vector inputs, std::vector &inputs) override; - void PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group, - const std::vector &inputs); - void PopulaterConv2DSingleGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group); #else Conv2DGradInput() = default; diff --git a/mindspore/lite/src/ops/depend.cc b/mindspore/lite/src/ops/depend.cc index 5176313f63b..fbddb1456dc 100644 --- a/mindspore/lite/src/ops/depend.cc +++ b/mindspore/lite/src/ops/depend.cc @@ -47,6 +47,15 @@ int Depend::UnPackAttr(const Primitive &prim, const std::vector &inp } return RET_OK; } +#else +int Depend::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { + MS_ASSERT(nullptr != primitive); + MS_ASSERT(nullptr != fbb); + auto val_offset = schema::CreateDepend(*fbb); + auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Depend, val_offset.o); + fbb->Finish(prim_offset); + return RET_OK; +} #endif } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/ops/depend.h b/mindspore/lite/src/ops/depend.h index 01ee755e0e3..909c4299350 100644 --- a/mindspore/lite/src/ops/depend.h +++ b/mindspore/lite/src/ops/depend.h @@ -31,9 +31,10 @@ class Depend : public PrimitiveC { int UnPackAttr(const Primitive &prim, const std::vector &inputs) override; #else Depend() = default; + int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; #endif }; } // namespace lite } // namespace mindspore -#endif // LITE_MINDSPORE_LITE_SRC_OPS_Depend_H_ +#endif // LITE_MINDSPORE_LITE_SRC_OPS_DEPEND_H_ diff --git a/mindspore/lite/src/ops/detection_post_process.h b/mindspore/lite/src/ops/detection_post_process.h index 8fecba00279..d2d92c72753 100644 --- a/mindspore/lite/src/ops/detection_post_process.h +++ b/mindspore/lite/src/ops/detection_post_process.h @@ -66,7 +66,6 @@ class DetectionPostProcess : public PrimitiveC { bool GetUseRegularNms() const; bool GetOutQuantized() const; }; - } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/ops/pooling_grad.cc b/mindspore/lite/src/ops/pooling_grad.cc index 2e687db019a..eed0f2d5f24 100644 --- a/mindspore/lite/src/ops/pooling_grad.cc +++ b/mindspore/lite/src/ops/pooling_grad.cc @@ -80,9 +80,9 @@ int PoolingGrad::UnPackAttr(const Primitive &prim, const std::vector } else { attr->format = schema::Format_NUM_OF_FORMAT; } - if (prim.instance_name() == "MaxPool") { + if (prim.instance_name() == "MaxPoolGrad") { attr->poolingMode = schema::PoolMode_MAX_POOLING; - } else if (prim.instance_name() == "MeanPool") { + } else if (prim.instance_name() == "MeanPoolGrad") { attr->poolingMode = schema::PoolMode_MEAN_POOLING; } @@ -189,6 +189,5 @@ int PoolingGrad::InferShape(std::vector inputs_, std::vector grad_output->SetFormat(input->GetFormat()); return RET_OK; } - } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/ops/primitive_c.cc b/mindspore/lite/src/ops/primitive_c.cc index 69cf3f34f5c..364742ad8c4 100644 --- a/mindspore/lite/src/ops/primitive_c.cc +++ b/mindspore/lite/src/ops/primitive_c.cc @@ -139,7 +139,6 @@ #include "src/ops/power_grad.h" #include "src/ops/softmax_cross_entropy.h" #include "src/ops/bn_grad.h" -#include "src/ops/bn_grad_input.h" #include "src/ops/arithmetic_grad.h" #include "src/ops/depend.h" #include "src/ops/flatten_grad.h" @@ -392,49 +391,42 @@ std::shared_ptr PrimitiveC::Create(const Primitive &prim, const std: return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "Log") { return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "Conv2DBackpropInput") { + } else if (op_type == "DeConv2D") { return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "tuple_getitem") { return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "Softmax") { return NewPrimitiveC(prim, inputs, quantType); -#ifdef SUPPORT_TRAIN0 + +#ifdef SUPPORT_TRAIN + } else if (op_type == "SoftmaxCrossEntropyWithLogits") { + return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "BiasAddGrad") { + return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "ApplyMomentum") { + return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "Depend") { + return NewPrimitiveC(prim, inputs, quantType); } else if ((op_type == "ReluGrad" || op_type == "Relu6Grad" || op_type == "SigmoidGrad")) { return NewPrimitiveC(prim, inputs, quantType); } else if ((op_type == "MaxPoolGrad") || (op_type == "MeanPoolGrad")) { return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "Conv2DBackpropFilter") { return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "BiasAddGrad") { - return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "ApplyMomentum") { - return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "Conv2DBackpropInput") { + return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "BatchNormGrad") { + return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "FlattenGrad") { + return NewPrimitiveC(prim, inputs, quantType); +#endif +#ifdef SUPPORT_TRAIN0 + } else if (op_type == "PowerGrad") { + return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "NegGrad") { return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "LogGrad") { return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "BatchNormGrad") { - return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "Conv2DGradInput") { - return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "Conv2DGradFilter") { - return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "BiasGrad") { - return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "ActivationGrad") { - return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "PoolingGrad") { - return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "BNGradInput") { - return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "PowerGrad") { - return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "SoftmaxCrossEntropyWithLogits") { - return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "Depend") { - return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "FlattenGrad") { - return NewPrimitiveC(prim, inputs, quantType); #endif } else { MS_LOG(ERROR) << "Unsupported primitive type in Create : " << op_type; @@ -677,12 +669,10 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) { return new ArithmeticGrad(primitive); case schema::PrimitiveType_DivGrad: return new ArithmeticGrad(primitive); - case schema::PrimitiveType_PowerGrad: - return new PowerGrad(primitive); - case schema::PrimitiveType_BNGradInput: - return new BNGradInput(primitive); case schema::PrimitiveType_SoftmaxCrossEntropy: return new SoftmaxCrossEntropy(primitive); + case schema::PrimitiveType_PowerGrad: + return new PowerGrad(primitive); case schema::PrimitiveType_Depend: return new Depend(primitive); case schema::PrimitiveType_FlattenGrad: @@ -934,7 +924,9 @@ PrimitiveC *PrimitiveC::Create(const schema::Primitive *primitive) { case schema::PrimitiveType_MulGrad: return NewPrimitiveC(primitive); case schema::PrimitiveType_DivGrad: - return NewPrimitiveC(primitive); + return NewPrimitiveC(primitive); + case schema::PrimitiveType_SoftmaxCrossEntropy: + return NewPrimitiveC(primitive); case schema::PrimitiveType_NegGrad: return NewPrimitiveC(primitive); case schema::PrimitiveType_LogGrad: diff --git a/mindspore/lite/src/ops/softmax_cross_entropy.cc b/mindspore/lite/src/ops/softmax_cross_entropy.cc index db7ef4c9fe7..6b56286ecd5 100644 --- a/mindspore/lite/src/ops/softmax_cross_entropy.cc +++ b/mindspore/lite/src/ops/softmax_cross_entropy.cc @@ -43,6 +43,8 @@ int SoftmaxCrossEntropy::UnPackAttr(const Primitive &prim, const std::vectoraxis = {0}; this->primitive_->value.value = attr; if (this->primitive_->value.value == nullptr) { MS_LOG(ERROR) << "primitive value is nullptr"; @@ -102,6 +104,5 @@ int SoftmaxCrossEntropy::InferShape(std::vector inputs, std::vectorHeight() * weight_tensor->Width(), channel); - auto bias_tensor = in_tensors_[kBiasIndex]; bias_data_ = reinterpret_cast(malloc(channel * sizeof(float))); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; @@ -59,6 +58,7 @@ int ConvolutionDepthwiseCPUKernel::InitWeightBias() { memset(bias_data_, 0, channel * sizeof(float)); if (in_tensors_.size() == kInputSize2) { + auto bias_tensor = in_tensors_[kBiasIndex]; auto ori_bias = reinterpret_cast(bias_tensor->MutableData()); memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float)); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc index 0f62d583eb5..2be4b722082 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc @@ -63,6 +63,30 @@ int FusedBatchnormCPUKernel::InitConstTensor() { return RET_OK; } +int FusedBatchnormCPUKernel::Run() { + auto ret = Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret; + return ret; + } + auto param = reinterpret_cast(op_parameter_); + if (is_train()) { + float *in = static_cast(in_tensors_[0]->MutableData()); + float *run_mean = static_cast(out_tensors_[1]->MutableData()); + float *run_var = static_cast(out_tensors_[2]->MutableData()); + float *save_mean = static_cast(out_tensors_[3]->MutableData()); + float *save_inv_var = static_cast(out_tensors_[4]->MutableData()); + std::fill(run_mean, run_mean+param->channel_, 0.f); + std::fill(run_var, run_var+param->channel_, 0.f); + FusedBatchNormFp32MeanVar(in, 0.9, run_mean, run_var, param, save_mean, save_inv_var); + } + ret = ParallelLaunch(THREAD_POOL_DEFAULT, BatchNormRun, this, op_parameter_->thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; + } + return ret; +} + int FusedBatchnormCPUKernel::DoExecute(int task_id) { auto param = reinterpret_cast(op_parameter_); FusedBatchNormFp32(in_tensors_.at(0)->MutableData(), scale_, offset_, mean_, variance_, param, task_id, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h index a60de876999..476c4fb0aed 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h @@ -30,7 +30,7 @@ class FusedBatchnormCPUKernel : public BatchnormCPUKernel { ~FusedBatchnormCPUKernel() { FreeScaleAndOffset(); } int ReSize() override; - + int Run() override; int InitConstTensor() override; int DoExecute(int task_id) override; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc index 3e040940e0f..9908c738dbf 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc @@ -186,10 +186,10 @@ int MatmulCPUKernel::Run() { auto b_src = reinterpret_cast(in_tensors_[1]->data_c()); auto c_src = reinterpret_cast(out_tensors_[0]->data_c()); - if (params_->a_const_ == false) { + if (params_->a_const_ == false || is_train()) { InitMatrixA(a_src, a_c12_ptr_); } - if (params_->b_const_ == false) { + if (params_->b_const_ == false || is_train()) { InitMatrixB(b_src, b_r8_ptr_); } @@ -201,4 +201,16 @@ int MatmulCPUKernel::Run() { } return RET_OK; } + +void MatmulCPUKernel::eval() { + // Copy weights after training + LiteKernel::eval(); + if (params_->a_const_ == true) { + InitMatrixA(reinterpret_cast(in_tensors_[0]->MutableData()), a_c12_ptr_); + } + if (params_->b_const_ == true) { + InitMatrixB(reinterpret_cast(in_tensors_[1]->MutableData()), b_r8_ptr_); + } +} + } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h index f310371a622..a7cbaceb572 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.h @@ -34,6 +34,8 @@ class MatmulCPUKernel : public MatmulBaseCPUKernel { int ReSize() override; int Run() override; int RunImpl(int task_id); + void eval() override; + private: void InitMatrixA(float *src_ptr, float *dst_ptr); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.h index 920d0343151..b4cd554af57 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.h @@ -28,7 +28,7 @@ class ActivationGradCPUKernel : public LiteKernel { explicit ActivationGradCPUKernel(OpParameter *param, const std::vector &inputs, const std::vector &outputs, const lite::Context *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(param, inputs, outputs, ctx, primitive) { + : LiteKernel(param, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { param_act_grad_ = reinterpret_cast(param); } ~ActivationGradCPUKernel() override = default; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.cc index 0265a26b424..c2f8abc39f3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.cc @@ -76,7 +76,7 @@ kernel::LiteKernel *CpuBiasGradFp32KernelCreator(const std::vector(opParameter), inputs, outputs, ctx, primitive); + new (std::nothrow) BiasGradCPUKernel(opParameter, inputs, outputs, ctx, primitive); if (kernel == nullptr) { MS_LOG(ERROR) << "new BiasGradCPUKernel fail!"; return nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc index 00d57b65f97..cbb78c3d32b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc @@ -56,7 +56,7 @@ OpParameter *PopulateBNGradParameter(const lite::Primitive *primitive) { int BNGradCPUKernel::Init() { auto *input_x = in_tensors_.at(1); int channels = input_x->shape().at(kNHWC_C); - workspace_size = 5 * channels; + workspace_size = 4 * channels; workspace = new (std::nothrow) float[workspace_size]; if (workspace == nullptr) { MS_LOG(ERROR) << "new workspace fail!"; @@ -89,9 +89,8 @@ int BNGradCPUKernel::Run() { std::fill(workspace, workspace + workspace_size, 0.f); float *mean = workspace; float *invar = mean + channels; - float *mean_delta = invar + channels; - float *variance_delta = mean_delta + channels; - float *mean_add_delta = variance_delta + channels; + float *dxhat_sum = invar + channels; + float *dxhathat_sum = dxhat_sum + channels; float *x = reinterpret_cast(input_x->MutableData()); float *yt = reinterpret_cast(input_yt->MutableData()); @@ -100,13 +99,7 @@ int BNGradCPUKernel::Run() { float *dscale = reinterpret_cast(output_scale->MutableData()); float *dbias = reinterpret_cast(output_bias->MutableData()); - std::copy(yt, yt + batch * channels * spatial, dx); - meanVar(x, batch, spatial, channels, eps, mean, invar); - scaleBias(scale, batch, channels, spatial, dx); - meanDelta(dx, spatial, channels, invar, mean_delta); - varianceDelta(x, dx, mean, invar, batch, channels, spatial, variance_delta); - meanAdd(x, mean, variance_delta, batch, channels, spatial, mean_add_delta, mean_delta); - NormalizeDelta(x, mean, invar, mean_delta, variance_delta, batch, channels, spatial, dx); + backwardX(x, yt, scale, batch * spatial, channels, eps, mean, invar, dxhat_sum, dxhathat_sum, dx); // dbias sumSpatialBatch(yt, batch * spatial, channels, dbias); // dscale diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h index c01ade2bf96..827e4b88e16 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h @@ -29,7 +29,7 @@ class BNGradCPUKernel : public LiteKernel { const std::vector &outputs, const lite::Context *ctx, const mindspore::lite::PrimitiveC *primitive) : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} - ~BNGradCPUKernel() override { delete workspace; } + ~BNGradCPUKernel() override { delete [] workspace; } int Init() override; int ReSize() override; @@ -39,8 +39,5 @@ class BNGradCPUKernel : public LiteKernel { float *workspace; int workspace_size; }; - -// OpParameter *PopulateBNGradParameter(const lite::Primitive *primitive); - } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_BN_GRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc index b27a264ef73..f84e0f3ab96 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc @@ -41,10 +41,12 @@ int ConvolutionTrainCPUKernel::Init() { conv_param_->kernel_h_ = input_weight->shape().at(kNHWC_H); conv_param_->kernel_w_ = input_weight->shape().at(kNHWC_W); + conv_param_->group_ = (conv_param_->group_ == 0)? conv_param_->input_channel_:conv_param_->group_; + int ws_size = conv_param_->output_h_ * conv_param_->output_w_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ / conv_param_->group_; - workspace = new float[ws_size]; + workspace = new (std::nothrow) float[ws_size]; return RET_OK; } @@ -103,7 +105,7 @@ kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector +#include "src/kernel_registry.h" +#include "nnacl/softmax_parameter.h" +#include "nnacl/fp32/softmax.h" +#include "src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.h" +#include "include/errorcode.h" + +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_SoftmaxCrossEntropy; + +namespace mindspore::kernel { + +int SoftmaxCrossEntropyWithLogitsCPUKernel::ReSize() { return RET_OK; } + +void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *labels, const float *logits, + float *grads, float *output2) const { + float eps = 1e-6; + float total_loss = 0.0; + if (grads != nullptr) { + for (int i = 0; i < param_->batch_size_; ++i) { + for (size_t j = 0; j < param_->number_of_classes_; ++j) { + float logit = + -logf(logits[i * param_->number_of_classes_ + j] <= 0.0 ? eps : logits[i * param_->number_of_classes_ + j]); + grads[i * param_->number_of_classes_ + j] = + (logits[i * param_->number_of_classes_ + j] - labels[i * param_->number_of_classes_ + j])/param_->batch_size_; + total_loss += labels[i * param_->number_of_classes_ + j] * logit; + } + } + } else { + for (int i = 0; i < param_->batch_size_; ++i) { + for (size_t j = 0; j < param_->number_of_classes_; ++j) { + float logit = + -logf(logits[i * param_->number_of_classes_ + j] <= 0.0 ? eps : logits[i * param_->number_of_classes_ + j]); + total_loss += labels[i * param_->number_of_classes_ + j] * logit; + } + } + } + output2[0] = total_loss / param_->batch_size_; +} + +#if 0 +void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const int *labels, const float *losses, + float *output) const { + float total_loss = 0; + for (int i = 0; i < param_->batch_size_; ++i) { + if (labels[i] < 0) { + MS_LOG(EXCEPTION) << "label value must >= 0"; + } + size_t label = labels[i]; + if (label > param->number_of_classes_) { + MS_LOG(EXCEPTION) << "error label input!"; + } else { + total_loss -= logf(losses[i * param->number_of_classes_ + label]); + } + } + output[0] = total_loss / param->batch_size_; +} + +void SoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *labels, const float *losses, float *grads, + float *output) const { + size_t row_start = 0; + float total_loss = 0; + for (int i = 0; i < param->batch_size_; ++i) { + if (labels[i] < 0) { + MS_LOG(EXCEPTION) << "label value must >= 0"; + } + size_t label = labels[i]; + if (label > param->number_of_classes_) { + MS_LOG(EXCEPTION) << "error label input!"; + } else { + total_loss -= logf(losses[i * param->number_of_classes_ + label]); + for (size_t j = 0; j < param->number_of_classes_; ++j) { + size_t index = row_start + j; + if (j == label) { + grads[index] = (losses[index] - 1) / param->batch_size_; + } else { + grads[index] = losses[index] / param->batch_size_; + } + } + } + row_start += param->number_of_classes_; + } + output[0] = total_loss / param->batch_size_; +} +#endif + +int SoftmaxCrossEntropyWithLogitsCPUKernel::Run() { + auto ret = Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Prepare failed."; + return ret; + } + + auto ins = reinterpret_cast(in_tensors_.at(0)->MutableData()); + auto labels = reinterpret_cast(in_tensors_.at(1)->MutableData()); + float *out = reinterpret_cast(out_tensors_.at(0)->MutableData()); + float *grads = NULL; + if (is_train() && out_tensors_.size() > 1) { + grads = reinterpret_cast(out_tensors_.at(1)->MutableData()); + } + size_t data_size = in_tensors_.at(0)->ElementsNum(); + float *losses = new (std::nothrow) float[data_size]; + if (losses == nullptr) { + MS_LOG(ERROR) << "losses is null"; + return RET_ERROR; + } + + MS_ASSERT(out != nullptr); + MS_ASSERT(labels != nullptr); + MS_ASSERT(ins != nullptr); + std::fill(losses_, losses_ + data_size, 0); + std::fill(sum_data_, sum_data_ + sm_params_.input_shape_[0], 0); + Softmax(ins, losses_, sum_data_, &sm_params_); + ForwardPostExecute(labels, losses_, grads, out); + return RET_OK; +} + +int SoftmaxCrossEntropyWithLogitsCPUKernel::Init() { + auto dims = in_tensors_[0]->shape(); + param_->n_dim_ = 2; + param_->number_of_classes_ = dims[1]; + param_->batch_size_ = dims[0]; + for (unsigned int i = 0; i < dims.size(); i++) param_->input_shape_[i] = dims[i]; + if (2 != this->in_tensors_.size()) { + MS_LOG(ERROR) << "softmax entropy loss should have two inputs"; + return RET_ERROR; + } + auto *in0 = in_tensors_.front(); + if (in0 == nullptr) { + MS_LOG(ERROR) << "softmax etropy loss in0 have no data"; + return RET_ERROR; + } + + size_t data_size = in_tensors_.at(0)->ElementsNum(); + losses_ = new (std::nothrow) float[data_size]; + sum_data_ = new (std::nothrow) float[dims[0]]; + MS_ASSERT(losses_ != nullptr); + MS_ASSERT(sum_data_ != nullptr); + + sm_params_.n_dim_ = 2; + sm_params_.element_size_ = data_size; + sm_params_.axis_ = 1; + for (size_t i = 0; i < dims.size(); i++) sm_params_.input_shape_[i] = dims[i]; + + return RET_OK; +} + +kernel::LiteKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc, + const mindspore::lite::PrimitiveC *primitive) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_SoftmaxCrossEntropy); + auto *kernel = + new (std::nothrow) SoftmaxCrossEntropyWithLogitsCPUKernel(opParameter, inputs, outputs, ctx, primitive); + MS_ASSERT(kernel != nullptr); + auto ret = kernel->Init(); + if (RET_OK != ret) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + // REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SoftmaxCrossEntropy, CpuSoftmaxCrossEntropyFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.h new file mode 100644 index 00000000000..6eaeb5a4d11 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.h @@ -0,0 +1,62 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_ + +#include +#include "src/train/loss_kernel.h" +#include "ir/anf.h" +#include "nnacl/fp32_grad/softmax_grad.h" +#include "nnacl/fp32/arithmetic.h" +#include "nnacl/softmax_parameter.h" + +namespace mindspore::kernel { + +class SoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel { + public: + explicit SoftmaxCrossEntropyWithLogitsCPUKernel(OpParameter *parameter, + const std::vector &inputs, + const std::vector &outputs, + const lite::Context *ctx, + const mindspore::lite::PrimitiveC *primitive) + : LossKernel(parameter, inputs, outputs, ctx, primitive) { + param_ = reinterpret_cast(parameter); + } + ~SoftmaxCrossEntropyWithLogitsCPUKernel() override { + delete[] losses_; + delete[] sum_data_; + } + + void ForwardPostExecute(const float *labels, const float *logits, + float *output1, float *output2) const; + // void ForwardPostExecute(const int *labels, const float *losses, float *output) const; + // void GradPostExecute(const int *labels, const float *losses, float* grads, float *output) const; + + int Init() override; + int ReSize() override; + int Run() override; + + private: + SoftmaxCrossEntropyParameter *param_; + SoftmaxParameter sm_params_; + float *losses_ = nullptr; + float *sum_data_ = nullptr; +}; + +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.cc new file mode 100644 index 00000000000..cb09b077f72 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.cc @@ -0,0 +1,100 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/fp32_grad/softmax_grad.h" +#include +#include +#include "nnacl/fp32_grad/softmax_grad.h" +#include "schema/model_generated.h" +#include "src/kernel_registry.h" +#include "include/errorcode.h" + +// using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +// using mindspore::schema::PrimitiveType_SoftMaxGrad; + +namespace mindspore::kernel { +int SoftmaxGradCPUKernel::Init() { + // auto input_tensor =in_tensors_.at(0); + + param = reinterpret_cast(op_parameter_); + auto in_shape = in_tensors_.at(0)->shape(); + auto in_dims = in_shape.size(); + int ele_size = 1; + param->n_dim_ = in_dims; + for (size_t i = 0; i < in_dims; i++) { + param->input_shape_[i] = in_shape[i]; + ele_size *= in_shape[i]; + } + param->element_size_ = ele_size; + + // malloc tmp buffer + auto axis = param->axis_; + if ((axis < -1) || (axis > param->n_dim_)) { + MS_LOG(ERROR) << "SoftmaxGrad axis is invalid!"; + } else if (axis == -1) { + axis = param->axis_ = (in_dims - 1); + } + + int inner_size = 1; + for (size_t i = axis + 1; i < in_dims; i++) { + inner_size *= in_shape[i]; + } + + sum_data_ = new (std::nothrow) float[inner_size]; + MS_ASSERT(sum_data_ != nullptr); + sum_mul_ = new (std::nothrow) float[inner_size * in_shape[axis]]; + MS_ASSERT(sum_mul_ != nullptr); + return RET_OK; +} + +int SoftmaxGradCPUKernel::ReSize() { return RET_OK; } + +int SoftmaxGradCPUKernel::Run() { + // auto input_ptr = reinterpret_cast(in_tensors_.at(kInputIndex)->MutableData()); + auto input_ptr = reinterpret_cast(in_tensors_.at(kInputIndex)->MutableData()); + auto yt_ptr = reinterpret_cast(in_tensors_.at(1)->MutableData()); + auto output_ptr = reinterpret_cast(out_tensors_.at(kOutputIndex)->MutableData()); + SoftmaxGrad(input_ptr, yt_ptr, output_ptr, sum_data_, sum_mul_, reinterpret_cast(op_parameter_)); + return RET_OK; +} + +kernel::LiteKernel *CpuSoftmaxGradFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc, + const mindspore::lite::PrimitiveC *primitive) { + MS_ASSERT(opParameter != nullptr); + // MS_ASSERT(desc.type == schema::PrimitiveType_SoftMaxGrad); + auto *kernel = new (std::nothrow) SoftmaxGradCPUKernel(opParameter, inputs, outputs, ctx, primitive); + if (kernel == nullptr) { + MS_LOG(ERROR) << "new SoftmaxGradCPUKernel fail!"; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +// REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SoftMaxGrad, CpuSoftmaxGradFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h new file mode 100644 index 00000000000..c35271a2f1e --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h @@ -0,0 +1,49 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_GRAD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_GRAD_H_ + +#include +#include "src/lite_kernel.h" +#include "nnacl/softmax_parameter.h" + + +namespace mindspore::kernel { +class SoftmaxGradCPUKernel : public LiteKernel { + public: + explicit SoftmaxGradCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::Context *ctx, + const lite::PrimitiveC *primitive) + : LiteKernel(parameter, inputs, outputs, ctx, primitive) { + param = reinterpret_cast(parameter); + } + ~SoftmaxGradCPUKernel() override = default; + + int Init() override; + int ReSize() override; + int Run() override; + + private: + SoftmaxParameter *param; + float *sum_data_ = nullptr; + float *sum_mul_ = nullptr; +}; + +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_GRAD_H_ + diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc index 3b03661df79..b9d38ad9b86 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc @@ -143,7 +143,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Init() { return RET_OK; } -kernel::LiteKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector &inputs, +kernel::LiteKernel *CpuSparseSoftmaxCrossEntropyFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::Context *ctx, const kernel::KernelKey &desc, @@ -163,5 +163,5 @@ kernel::LiteKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector
  • data_type_ = srcTensor.data_type_; this->shape_ = srcTensor.shape_; this->category_ = srcTensor.category_; + this->format_ = srcTensor.format_; if (copyData) { auto ret = CopyTensorData(srcTensor); if (0 != ret) { diff --git a/mindspore/lite/src/train/train_populate_parameter.cc b/mindspore/lite/src/train/train_populate_parameter.cc index b2766fd9914..3c059f997da 100644 --- a/mindspore/lite/src/train/train_populate_parameter.cc +++ b/mindspore/lite/src/train/train_populate_parameter.cc @@ -27,6 +27,8 @@ #include "nnacl/conv_parameter.h" #include "src/ops/power_grad.h" #include "nnacl/power_parameter.h" +#include "src/ops/bias_grad.h" +#include "nnacl/arithmetic_common.h" namespace mindspore::kernel { @@ -36,7 +38,7 @@ OpParameter *DefaultPopulateParameter(const mindspore::lite::PrimitiveC *primiti return nullptr; } - OpParameter *param = new (std::nothrow) OpParameter(); + OpParameter *param = reinterpret_cast(malloc(sizeof(OpParameter))); if (param == nullptr) { MS_LOG(ERROR) << "new Param for primitive failed."; return nullptr; @@ -51,7 +53,8 @@ OpParameter *PopulateSoftmaxCrossEntropyParameter(const mindspore::lite::Primiti MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op."; return nullptr; } - SoftmaxCrossEntropyParameter *sce_param = new (std::nothrow) SoftmaxCrossEntropyParameter(); + SoftmaxCrossEntropyParameter *sce_param = reinterpret_cast + (malloc(sizeof(SoftmaxCrossEntropyParameter))); if (sce_param == nullptr) { MS_LOG(ERROR) << "new SoftmaxCrossEntropyParameter failed."; return nullptr; @@ -65,7 +68,7 @@ OpParameter *PopulatePoolingGradParameter(const mindspore::lite::PrimitiveC *pri MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op."; return nullptr; } - PoolingParameter *pooling_param = new (std::nothrow) PoolingParameter(); + PoolingParameter *pooling_param = reinterpret_cast(malloc(sizeof(PoolingParameter))); if (pooling_param == nullptr) { MS_LOG(ERROR) << "new PoolingParameter failed."; return nullptr; @@ -118,7 +121,7 @@ OpParameter *PopulateActivationGradParameter(const mindspore::lite::PrimitiveC * return nullptr; } - ActivationParameter *act_param = new (std::nothrow) ActivationParameter(); + ActivationParameter *act_param = reinterpret_cast(malloc(sizeof(ActivationParameter))); if (act_param == nullptr) { MS_LOG(ERROR) << "new ActivationParameter failed."; return nullptr; @@ -137,7 +140,7 @@ OpParameter *PopulateConvolutionGradFilterParameter(const mindspore::lite::Primi return nullptr; } - ConvParameter *param = new (std::nothrow) ConvParameter(); + ConvParameter *param = reinterpret_cast(malloc(sizeof(ConvParameter))); if (param == nullptr) { MS_LOG(ERROR) << "new Param for conv grad filter failed."; return nullptr; @@ -178,7 +181,7 @@ OpParameter *PopulateConvolutionGradInputParameter(const mindspore::lite::Primit return nullptr; } - ConvParameter *param = new (std::nothrow) ConvParameter(); + ConvParameter *param = reinterpret_cast(malloc(sizeof(ConvParameter))); if (param == nullptr) { MS_LOG(ERROR) << "new Param for conv grad filter failed."; return nullptr; @@ -219,7 +222,7 @@ OpParameter *PopulatePowerGradParameter(const mindspore::lite::PrimitiveC *primi return nullptr; } - PowerParameter *power_param = new (std::nothrow) PowerParameter(); + PowerParameter *power_param = reinterpret_cast(malloc(sizeof(PowerParameter))); if (power_param == nullptr) { MS_LOG(ERROR) << "new PowerParameter failed."; return nullptr; @@ -232,10 +235,25 @@ OpParameter *PopulatePowerGradParameter(const mindspore::lite::PrimitiveC *primi return reinterpret_cast(power_param); } +OpParameter *PopulateBiasGradParameter(const mindspore::lite::PrimitiveC *primitive) { + if (primitive == nullptr) { + MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op."; + return nullptr; + } + + ArithmeticParameter *arithmetic_param = reinterpret_cast(malloc(sizeof(ArithmeticParameter))); + if (arithmetic_param == nullptr) { + MS_LOG(ERROR) << "new ArithmeticParameter failed."; + return nullptr; + } + arithmetic_param->op_parameter_.type_ = primitive->Type(); + return reinterpret_cast(arithmetic_param); +} + void PopulateTrainParameters() { auto ppr = PopulateParameterRegistry::GetInstance(); ppr->AddPopulateParameterFunc(schema::PrimitiveType_ApplyMomentum, DefaultPopulateParameter); - ppr->AddPopulateParameterFunc(schema::PrimitiveType_BiasGrad, PopulateArithmetic); + ppr->AddPopulateParameterFunc(schema::PrimitiveType_BiasGrad, PopulateBiasGradParameter); ppr->AddPopulateParameterFunc(schema::PrimitiveType_SoftmaxCrossEntropy, PopulateSoftmaxCrossEntropyParameter); ppr->AddPopulateParameterFunc(schema::PrimitiveType_ActivationGrad, PopulateActivationGradParameter); ppr->AddPopulateParameterFunc(schema::PrimitiveType_TupleGetItem, DefaultPopulateParameter); diff --git a/mindspore/lite/src/train/train_session.cc b/mindspore/lite/src/train/train_session.cc index 7d49c2518e7..315042502aa 100644 --- a/mindspore/lite/src/train/train_session.cc +++ b/mindspore/lite/src/train/train_session.cc @@ -35,6 +35,10 @@ void TrainSession::ReplaceOps() { mindspore::lite::KernelRegistrar tmp(mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, mindspore::schema::PrimitiveType_Conv2D, mindspore::kernel::CpuConvTrainFp32KernelCreator); + + mindspore::lite::KernelRegistrar tmp0(mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, + mindspore::schema::PrimitiveType_DepthwiseConv2D, + mindspore::kernel::CpuConvTrainFp32KernelCreator); } int TrainSession::CompileGraph(lite::Model *model) { @@ -124,5 +128,4 @@ std::vector TrainSession::GetOutputsByName(const std::string } return ret->second; } - } // namespace mindspore::session diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/arithmetic_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/arithmetic_grad_fp32_tests.cc new file mode 100644 index 00000000000..ce8f7cdb537 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/arithmetic_grad_fp32_tests.cc @@ -0,0 +1,584 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include "utils/log_adapter.h" +#include "common/common_test.h" +#include "src/common/file_utils.h" +#include "src/common/file_utils_ext.h" +#include "nnacl/fp32/reduce.h" +#include "src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h" +#include "src/kernel_registry.h" +#include "src/ops/arithmetic_grad.h" + +#ifdef PRIMITIVE_WRITEABLE +namespace mindspore { + +ArithmeticParameter *PopulateArithmeticParameter(mindspore::schema::PrimitiveType type, + std::vector inputs, + std::vector outputs) { + ArithmeticParameter *arithmetic_param = static_cast(malloc(sizeof(ArithmeticParameter))); + if (arithmetic_param == nullptr) { + MS_LOG(ERROR) << "new ArithmeticParameter failed."; + return nullptr; + } + arithmetic_param->op_parameter_.type_ = type; + schema::PrimitiveT *prim = new schema::PrimitiveT; + prim->value.type = type; + auto agrad = mindspore::lite::ArithmeticGrad(prim); + agrad.InferShape(inputs, outputs); + + arithmetic_param->ndim_ = agrad.NDims(); + for (size_t i = 0; i < agrad.dyShape().size(); i++) arithmetic_param->out_shape_[i] = (agrad.dyShape())[i]; + for (size_t i = 0; i < agrad.x1Shape().size(); i++) arithmetic_param->in_shape0_[i] = (agrad.x1Shape())[i]; + for (size_t i = 0; i < agrad.x2Shape().size(); i++) arithmetic_param->in_shape1_[i] = (agrad.x2Shape())[i]; + return arithmetic_param; +} + +class TestArithmeticGradFp32 : public mindspore::CommonTest { + public: + TestArithmeticGradFp32() {} +}; + +std::vector GenerateTensorsForTest(const char *test, int test_id) { + size_t input_size; + std::vector large_dim({4, 6}); + std::vector small_dim({6}); + int large_size = (4 * 6); + int small_size = (1 * 6); + char *dx1_file = const_cast("./test_data/operators/arithmetic_fp32_1_x1_4_6.bin"); + char *dx2_file = const_cast("./test_data/operators/arithmetic_fp32_1_x2_1_6.bin"); + + if (test_id == 7) { + large_dim = std::vector({4, 5, 6}); + small_dim = std::vector({6}); + large_size = (4 * 5 * 6); + small_size = (6); + dx1_file = const_cast("./test_data/operators/arithmetic_fp32_7_x1_4_5_6.bin"); + dx2_file = const_cast("./test_data/operators/arithmetic_fp32_7_x2_1_1_6.bin"); + } + if (test_id >= 8) { + large_dim = std::vector({5, 4, 6}); + small_dim = std::vector({5, 1, 6}); + large_size = (4 * 5 * 6); + small_size = (5 * 6); + dx1_file = const_cast("./test_data/operators/arithmetic_fp32_8_x1_5_4_6.bin"); + dx2_file = const_cast("./test_data/operators/arithmetic_fp32_8_x2_5_1_6.bin"); + } + + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(test, &input_size)); + lite::Tensor *dy_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); + dy_tensor->SetData(dy_data); + + auto x1_data = reinterpret_cast(mindspore::lite::ReadFile(dx1_file, &input_size)); + lite::Tensor *x1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); + x1_tensor->SetData(x1_data); + + auto x2_data = reinterpret_cast(mindspore::lite::ReadFile(dx2_file, &input_size)); + lite::Tensor *x2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); + x2_tensor->SetData(x2_data); + + auto dx1_data = new float[large_size]; + lite::Tensor *dx1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); + dx1_tensor->SetData(dx1_data); + + auto dx2_data = new float[small_size]; + lite::Tensor *dx2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); + dx2_tensor->SetData(dx2_data); + + std::vector ret_vector = {dy_tensor, x1_tensor, x2_tensor, dx1_tensor, dx2_tensor}; + return ret_vector; +} + +TEST_F(TestArithmeticGradFp32, TestAddGradFp32) { + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_1_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->MutableData()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_1_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + for (auto tensor : all_tensors) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->SetData(nullptr); + delete tensor; + } + delete kernel_obj; + MS_LOG(INFO) << "TestAddGradFp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) { + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1); + + std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; + std::vector outputs = {all_tensors[4], all_tensors[3]}; + auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_1_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[1]->MutableData()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_1_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + for (auto tensor : all_tensors) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->SetData(nullptr); + delete tensor; + } + // for (int i = 0; i < 5; i++) delete all_tensors[i]; //TODO tensor data is unique pointer + // delete param; + delete kernel_obj; + MS_LOG(INFO) << "TestAddGrad2Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) { + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_8_dy_5_4_6.bin", 8); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_8_dx2_5_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[1]->MutableData()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_8_dx1_5_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + + for (auto tensor : all_tensors) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->SetData(nullptr); + delete tensor; + } + // for (int i = 0; i < 5; i++) delete all_tensors[i]; + // delete param; + delete kernel_obj; + MS_LOG(INFO) << "TestAddGrad3Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestSubGradFp32) { + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_2_dy_4_6.bin", 2); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_2_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->MutableData()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_2_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + + for (auto tensor : all_tensors) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->SetData(nullptr); + delete tensor; + } + // for (int i = 0; i < 5; i++) delete all_tensors[i]; + // delete param; + delete kernel_obj; + MS_LOG(INFO) << "TestSubGradFp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) { + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_3_dy_4_6.bin", 3); + + std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; + std::vector outputs = {all_tensors[4], all_tensors[3]}; + auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_3_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[1]->MutableData()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_3_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + + for (auto tensor : all_tensors) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->SetData(nullptr); + delete tensor; + } + delete kernel_obj; + MS_LOG(INFO) << "TestSubGrad2Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestMulGradFp32) { + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + + int loop_count = 1000; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel_obj->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + printf("total cost (for %d loops): %lu us\n", loop_count, cost); + // auto time_avg = cost / loop_count; + // printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_4_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->MutableData()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_4_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + for (auto tensor : all_tensors) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->SetData(nullptr); + delete tensor; + } + delete kernel_obj; + // delete param; + MS_LOG(INFO) << "TestMulGradFp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) { + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4); + + std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; + std::vector outputs = {all_tensors[4], all_tensors[3]}; + auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_4_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[1]->MutableData()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_4_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + for (auto tensor : all_tensors) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->SetData(nullptr); + delete tensor; + } + // for (int i = 0; i < 5; i++) delete all_tensors[i]; + // delete param; + delete kernel_obj; + MS_LOG(INFO) << "TestMulGrad2Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) { + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_9_dx1_5_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->MutableData()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_9_dx2_5_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + for (auto tensor : all_tensors) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->SetData(nullptr); + delete tensor; + } + // for (int i = 0; i < 5; i++) delete all_tensors[i]; + // delete param; + delete kernel_obj; + MS_LOG(INFO) << "TestMulGrad3Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) { + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9); + + std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; + std::vector outputs = {all_tensors[4], all_tensors[3]}; + auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_9_dx1_5_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[1]->MutableData()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_9_dx2_5_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + for (auto tensor : all_tensors) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->SetData(nullptr); + delete tensor; + } + // for (int i = 0; i < 5; i++) delete all_tensors[i]; + // delete param; + delete kernel_obj; + MS_LOG(INFO) << "TestMulGrad4Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestDivGradFp32) { + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_5_dy_4_6.bin", 5); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/operators/arithmetic_fp32_5_dx1_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->MutableData()), output_path)); + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_5_dx2_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path)); + for (auto tensor : all_tensors) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->SetData(nullptr); + delete tensor; + } + // for (int i = 0; i < 5; i++) delete all_tensors[i]; + delete kernel_obj; + // delete param; + MS_LOG(INFO) << "TestDivGradFp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) { + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_6_dy_4_6.bin", 6); + + std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; + std::vector outputs = {all_tensors[4], all_tensors[3]}; + auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string dx2_path = "./test_data/operators/arithmetic_fp32_6_dx2_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[1]->MutableData()), dx2_path)); + + std::string output_path = "./test_data/operators/arithmetic_fp32_6_dx1_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path)); + + for (auto tensor : all_tensors) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->SetData(nullptr); + delete tensor; + } + // for (int i = 0; i < 5; i++) delete all_tensors[i]; + // delete param; + delete kernel_obj; + MS_LOG(INFO) << "TestDivGrad2Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) { + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_10_dy_5_4_6.bin", 10); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string dx1_path = "./test_data/operators/arithmetic_fp32_10_dx1_5_4_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->MutableData()), dx1_path)); + + std::string output_path = "./test_data/operators/arithmetic_fp32_10_dx2_5_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path)); + for (auto tensor : all_tensors) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->SetData(nullptr); + delete tensor; + } + // for (int i = 0; i < 5; i++) delete all_tensors[i]; + // delete param; + delete kernel_obj; + MS_LOG(INFO) << "TestDivGrad3Fp32 passed"; +} + +TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) { + std::vector all_tensors = + GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_7_dy_4_5_6.bin", 7); + + std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; + std::vector outputs = {all_tensors[3], all_tensors[4]}; + auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string dx1_path = "./test_data/operators/arithmetic_fp32_7_dx1_4_5_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast(outputs[0]->MutableData()), dx1_path)); + + std::string output_path = "./test_data/operators/arithmetic_fp32_7_dx2_1_1_6.bin"; + EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path)); + for (auto tensor : all_tensors) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->SetData(nullptr); + delete tensor; + } + delete kernel_obj; + MS_LOG(INFO) << "TestDivGrad2Fp32 passed"; +} + +} // namespace mindspore + +#endif diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc index f6d595ad5be..07021fc7a84 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc @@ -30,8 +30,7 @@ class TestBiasGradFp32 : public mindspore::CommonTest { TEST_F(TestBiasGradFp32, BiasGradFp32) { // prepare stage - auto bias_param = new ArithmeticParameter(); - + ArithmeticParameter* bias_param = static_cast(malloc(sizeof(ArithmeticParameter))); size_t input_size; std::string input_path = "./test_data/operators/biasgradfp32_1_dy_10_28_28_7.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc index 83fb12095ff..a44f4e710f3 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc @@ -43,7 +43,7 @@ lite::Tensor *TestBNGradFp32::CreateInTensor(std::string file_name, std::vector< TEST_F(TestBNGradFp32, BNGradFp32) { // prepare stage - auto bn_param = new BNGradParameter(); + auto bn_param = static_cast(malloc(sizeof(BNGradParameter))); bn_param->epsilon_ = 0.00001; bn_param->momentum_ = 0.1; const int batch = 2; @@ -88,22 +88,24 @@ TEST_F(TestBNGradFp32, BNGradFp32) { std::cout << "==========dx==========\n"; auto dx = reinterpret_cast(outputs[0]->MutableData()); for (int i = 0; i < 7; i++) std::cout << dx[i] << " "; + std::cout << "\n"; + auto res = mindspore::lite::CompareRelativeOutput(dx, "./test_data/bngrad/output_dx_2_4_5_3.bin"); std::cout << "\n=======dscale=======\n"; auto dscale = reinterpret_cast(outputs[1]->MutableData()); for (int i = 0; i < channels; i++) std::cout << dscale[i] << " "; std::cout << "\n"; - int res = mindspore::lite::CompareRelativeOutput(dscale, "./test_data/bngrad/output_dscale_3.bin"); + res = mindspore::lite::CompareRelativeOutput(dscale, "./test_data/bngrad/output_dscale_3.bin"); EXPECT_EQ(res, 0); std::cout << "==========dbias==========\n"; auto dbias = reinterpret_cast(outputs[2]->MutableData()); for (int i = 0; i < 3; i++) std::cout << dbias[i] << " "; std::cout << "\n"; - res = mindspore::lite::CompareRelativeOutput(dscale, "./test_data/bngrad/output_dscale_3.bin"); + res = mindspore::lite::CompareRelativeOutput(dbias, "./test_data/bngrad/output_dbias_3.bin"); EXPECT_EQ(res, 0); for (auto v : inputs) { delete[] reinterpret_cast(v->MutableData()); v->SetData(nullptr); - // delete v; + delete v; } delete kernel_obj; MS_LOG(INFO) << "BNGradFp32 passed"; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc index 64337817ef1..82ad88036dd 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc @@ -77,7 +77,7 @@ void InitConvParamGroup3Dilation2FP32(ConvParameter *conv_param) { TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { // prepare stage - auto conv_param = new ConvParameter(); + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); InitConvParamGroup1FP32(conv_param); size_t dy_size; @@ -144,7 +144,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { // prepare stage - auto conv_param = new ConvParameter(); + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); InitConvParamGroup1FP32(conv_param); size_t dy_size; @@ -211,7 +211,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { // prepare stage - auto conv_param = new ConvParameter(); + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); InitConvParamGroup3FP32(conv_param); size_t dy_size; @@ -277,7 +277,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { // prepare stage - auto conv_param = new ConvParameter(); + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); InitConvParamGroup3FP32(conv_param); size_t dy_size; @@ -344,7 +344,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { // prepare stage - auto conv_param = new ConvParameter(); + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); InitConvParamGroup3Dilation2FP32(conv_param); @@ -410,7 +410,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { // prepare stage - auto conv_param = new ConvParameter(); + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); InitConvParamGroup3Dilation2FP32(conv_param); size_t dy_size; @@ -476,7 +476,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { // prepare stage - auto conv_param = new ConvParameter(); + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); InitConvParamGroup3Dilation2FP32(conv_param); size_t x_size; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc index 69142bd69f8..637fce5a8c5 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc @@ -73,7 +73,7 @@ class NetworkTest : public mindspore::CommonTest { // +-------------+ | // V dw(9) | // +-----------Update-----+ - +#if 0 TEST_F(NetworkTest, tuning_layer) { const int BATCH_SIZE = 32; const int NUM_CLASSES = 10; @@ -177,7 +177,7 @@ TEST_F(NetworkTest, tuning_layer) { node->name = "Momentum"; meta_graph->nodes.emplace_back(std::move(node)); } - meta_graph->inputIndex = {6, 0}; // XXX TODO why is it reverse? + meta_graph->inputIndex = {0, 6}; meta_graph->outputIndex = {5, 14}; auto input0 = std::make_unique(); @@ -209,6 +209,7 @@ TEST_F(NetworkTest, tuning_layer) { weight->data.resize(weight_size); std::copy(buf, buf + weight_size, weight->data.data()); meta_graph->allTensors.emplace_back(std::move(weight)); + delete [] buf; // tensor 3 - matmul auto input3 = std::make_unique(); input3->nodeType = schema::NodeType::NodeType_Parameter; @@ -231,6 +232,7 @@ TEST_F(NetworkTest, tuning_layer) { bias->data.resize(bias_size); std::copy(buf, buf + bias_size, bias->data.data()); meta_graph->allTensors.emplace_back(std::move(bias)); + delete [] buf; // tensor 5 - bias_add auto input5 = std::make_unique(); @@ -366,13 +368,13 @@ TEST_F(NetworkTest, tuning_layer) { ASSERT_NE(nullptr, model); meta_graph.reset(); content = nullptr; - auto context = new lite::Context; - context->device_type_ = lite::DT_CPU; - context->cpu_bind_mode_ = lite::NO_BIND; - context->thread_num_ = 1; + lite::Context context; + context.device_type_ = lite::DT_CPU; + context.cpu_bind_mode_ = lite::NO_BIND; + context.thread_num_ = 1; auto session = new session::TrainSession(); ASSERT_NE(nullptr, session); - session->Init(context); + session->Init(&context); auto ret = session->CompileGraph(model); ASSERT_EQ(lite::RET_OK, ret); session->train(); @@ -392,7 +394,7 @@ TEST_F(NetworkTest, tuning_layer) { //=================================================== ASSERT_EQ(input_size, inTensor->Size()); memcpy(data, input_data, input_size); - + delete [] buf; auto labelTensor = inputs.at(1); ASSERT_NE(nullptr, labelTensor); ASSERT_EQ(BATCH_SIZE, labelTensor->ElementsNum()); @@ -408,7 +410,7 @@ TEST_F(NetworkTest, tuning_layer) { ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type()); auto *outData = reinterpret_cast(outTensor->MutableData()); ASSERT_NE(nullptr, outData); - std::cout << "========================dW=====================" << std::endl; + std::cout << "==============Initial=Scores===================" << std::endl; for (int i = 0; i < 20; i++) { std::cout << outData[i] << ", "; } @@ -422,27 +424,19 @@ TEST_F(NetworkTest, tuning_layer) { ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type()); outData = reinterpret_cast(outTensor->MutableData()); ASSERT_NE(nullptr, outData); - std::cout << "========================dW=====================" << std::endl; + std::cout << "==============Scores=after-single=train========" << std::endl; for (int i = 0; i < 20; i++) { std::cout << outData[i] << ", "; } -//=================================================== -#if 0 - size_t output_size; - std::string output_path = "./convfp32_out_1_28_28_32.bin"; - buf = mindspore::lite::ReadFile(output_path.c_str(), &output_size); - ASSERT_NE(nullptr, buf); - auto output_data = reinterpret_cast(buf); - ASSERT_NE(nullptr, output_data); - //=================================================== - ASSERT_EQ(output_size, runOutput->Size()); - for (size_t i = 0; i < runOutput->ElementsNum(); i++) { - ASSERT_EQ(output_data[i], outData[i]); - } -#endif - MS_LOG(INFO) << "Passed"; -} + std::string output_path = "./test_data/train/train_output_32_10.bin"; + auto error = lite::RelativeOutputError(outData, output_path); + EXPECT_LT(error, 2e-3); + MS_LOG(INFO) << "TuningLayer passed"; + delete model; + delete session; +} +#endif int32_t fileIterator(mindspore::session::TrainSession *session, const std::string &path, std::function cb) { int32_t res = 0; @@ -459,7 +453,7 @@ int32_t fileIterator(mindspore::session::TrainSession *session, const std::strin } void replaceExt(const std::string &src, std::string *dst) { *dst = src.substr(0, src.find_last_of('.')) + ".emb"; } -int32_t runEffNet(mindspore::session::TrainSession *session, const std::string &in, const std::string &out) { +int32_t runEffNet(mindspore::lite::LiteSession *session, const std::string &in, const std::string &out) { // setup input auto inputs = session->GetInputs(); // ASSERT_EQ(inputs.size(), 1); @@ -473,14 +467,15 @@ int32_t runEffNet(mindspore::session::TrainSession *session, const std::string & auto input_data = reinterpret_cast(in_buf); // ASSERT_EQ(input_size, inTensor->Size()); std::copy(input_data, input_data + inTensor->ElementsNum(), data); + delete [] in_buf; // execute network session->RunGraph(); // compare outputs - auto outputs = session->GetOutputMap(); + auto outputs = session->GetOutputs(); auto output = ((outputs.begin())->second); - float *output_data = reinterpret_cast(output.at(0)->MutableData()); + float *output_data = reinterpret_cast(output->MutableData()); return mindspore::lite::CompareRelativeOutput(output_data, out.c_str()); } @@ -488,15 +483,19 @@ int32_t runEffNet(mindspore::session::TrainSession *session, const std::string & TEST_F(NetworkTest, efficient_net) { char *buf = nullptr; size_t net_size = 0; - std::string net = "./test_data/nets/efficientnet_b0_f.ms"; + // std::string net = "./test_data/nets/efficientnet_b0_f.ms"; + + std::string net = "./test_data/nets/effnetb0_fwd_nofuse.ms"; ReadFile(net.c_str(), &net_size, &buf); auto model = lite::Model::Import(buf, net_size); + delete [] buf; auto context = new lite::Context; context->device_type_ = lite::DT_CPU; context->cpu_bind_mode_ = lite::NO_BIND; context->thread_num_ = 1; auto session = new mindspore::session::TrainSession(); + // auto session = new mindspore::lite::LiteSession(); ASSERT_NE(session, nullptr); auto ret = session->Init(context); ASSERT_EQ(lite::RET_OK, ret); @@ -506,7 +505,7 @@ TEST_F(NetworkTest, efficient_net) { #if 0 std::string path = "/opt/share/MiniBinEmbDataset/"; - auto res = fileIterator(session, path, [](mindspore::session::TrainSession *session, const std::string &in) { + auto res = fileIterator(session, path, [](mindspore::lite::LiteSession *session, const std::string &in) { int32_t res = 0; if (in.find(".bin") != std::string::npos) { std::string out; @@ -549,6 +548,9 @@ TEST_F(NetworkTest, efficient_net) { // float* output_data = reinterpret_cast(output.at(0)->MutableData()); // int res = lite::CompareRelativeOutput(output_data, output_path); ASSERT_EQ(res, 0); + delete model; + delete session; + delete context; } } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc index 3a9b9a2238d..e834ae8670b 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc @@ -54,11 +54,12 @@ void InitPoolingParamFP32(PoolingParameter *pooling_param) { pooling_param->pad_l_ = 1; pooling_param->pad_r_ = 1; pooling_param->thread_num_ = 1; + pooling_param->global_ = false; } TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) { // prepare stage - auto pooling_param = new PoolingParameter(); + auto pooling_param = static_cast(malloc(sizeof(PoolingParameter))); InitPoolingParamFP32(pooling_param); pooling_param->output_channel_ = 3; pooling_param->pool_mode_ = PoolMode_AvgPool; @@ -95,20 +96,21 @@ TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) { } std::cout << std::endl; std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_1_28_28_3.bin"; - lite::CompareOutput(output_data, output_path); + auto res = lite::CompareOutput(output_data, output_path); + EXPECT_EQ(res, 0); delete[] input_data; delete[] output_data; - delete pooling_param; + free(pooling_param); MS_LOG(INFO) << "TestAvgPoolingGradFp32 passed"; } TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { // prepare stage - auto pooling_param = new PoolingParameter(); + auto pooling_param = static_cast(malloc(sizeof(PoolingParameter))); InitPoolingParamFP32(pooling_param); - pooling_param->output_channel_ = 3; + pooling_param->pool_mode_ = PoolMode_AvgPool; // runtime part printf("Calculating runtime cost...\n"); @@ -150,7 +152,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { } std::cout << std::endl; std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_1_28_28_3.bin"; - lite::CompareOutput(output_data, output_path); + auto res = lite::CompareOutput(output_data, output_path); + EXPECT_EQ(res, 0); delete[] input_data; delete[] input1_data; @@ -165,38 +168,36 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { // prepare stage - auto pooling_param = new PoolingParameter(); + auto pooling_param = static_cast(malloc(sizeof(PoolingParameter))); InitPoolingParamFP32(pooling_param); pooling_param->output_channel_ = 3; pooling_param->input_batch_ = 3; pooling_param->output_batch_ = 3; + pooling_param->pool_mode_ = PoolMode_AvgPool; // runtime part printf("Calculating runtime cost...\n"); // uint64_t time_avg = 0; - size_t output_data_size = - pooling_param->output_batch_ * pooling_param->output_channel_ * pooling_param->input_h_ * pooling_param->input_w_; - size_t input_size; std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_3_28_28_3.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - std::vector dim_dy({1, 28, 28, 3}); + std::vector dim_dy({3, 28, 28, 3}); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); dy_tensor.SetData(input_data); std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_3_28_28_3.bin"; auto input1_data = reinterpret_cast(mindspore::lite::ReadFile(input1_path.c_str(), &input_size)); - std::vector dim_x({1, 28, 28, 3}); + std::vector dim_x({3, 28, 28, 3}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.SetData(input1_data); std::vector inputs = {&dy_tensor, &x_tensor}; - auto output_data = new float[output_data_size]; - std::vector dim_dx({1, 28, 28, 3}); + std::vector dim_dx({3, 28, 28, 3}); lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); - dx_tensor.SetData(output_data); + dx_tensor.MallocData(); + auto output_data = reinterpret_cast(dx_tensor.MutableData()); std::vector outputs = {&dx_tensor}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; @@ -212,12 +213,11 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { } std::cout << std::endl; std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_3_28_28_3.bin"; - lite::CompareOutput(output_data, output_path); + auto res = lite::CompareOutput(output_data, output_path); + EXPECT_EQ(res, 0); delete[] input_data; delete[] input1_data; - delete[] output_data; - dx_tensor.SetData(nullptr); x_tensor.SetData(nullptr); dy_tensor.SetData(nullptr); // delete pooling_param; @@ -228,7 +228,7 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { // prepare stage // input size will be equal to the original size of x, output size will be the output size as in forward - auto pool = new PoolingParameter(); + auto pool = static_cast(malloc(sizeof(PoolingParameter))); InitPoolingParamFP32(pool); pool->output_channel_ = 3; pool->pool_mode_ = PoolMode_AvgPool; @@ -240,7 +240,6 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { pool->stride_w_ = 2; size_t input_size; - size_t y_data_size = pool->output_batch_ * pool->output_channel_ * pool->input_h_ * pool->input_w_; auto x_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s2_x_3_28_28_3.bin", &input_size)); @@ -253,11 +252,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { std::vector dim_y({pool->output_batch_, pool->output_h_, pool->output_w_, pool->output_channel_}); lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); yt_tensor.SetData(yt_data); - - auto out_data = new float[y_data_size]; lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x); - out_tensor.SetData(out_data); - + out_tensor.MallocData(); + float *out_data = static_cast(out_tensor.MutableData()); std::vector inputs = {&yt_tensor, &x_tensor}; std::vector outputs = {&out_tensor}; // ---------------------------------------- @@ -274,7 +271,6 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { std::string output_path = "./test_data/pooling/avgpoolgradfp32_s2_dx_3_28_28_3.bin"; auto res = lite::CompareRelativeOutput(out_data, output_path); - EXPECT_EQ(res, 0); delete[] x_data; @@ -283,7 +279,6 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { // delete conv_param; x_tensor.SetData(nullptr); yt_tensor.SetData(nullptr); - out_tensor.SetData(nullptr); delete kernel; MS_LOG(INFO) << "AvgPoolGradStride2Fp32 Filter Grad passed"; } @@ -291,7 +286,7 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { // prepare stage // input size will be equal to the original size of x, output size will be the output size as in forward - auto pool = new PoolingParameter(); + auto pool = static_cast(malloc(sizeof(PoolingParameter))); InitPoolingParamFP32(pool); pool->output_channel_ = 3; pool->pool_mode_ = PoolMode_AvgPool; @@ -303,7 +298,6 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { pool->stride_w_ = 3; size_t input_size; - size_t y_data_size = pool->output_batch_ * pool->output_channel_ * pool->input_h_ * pool->input_w_; auto x_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s3_x_3_28_28_3.bin", &input_size)); @@ -317,9 +311,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); yt_tensor.SetData(yt_data); - auto out_data = new float[y_data_size]; lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x); - out_tensor.SetData(out_data); + out_tensor.MallocData(); + auto out_data = static_cast(out_tensor.MutableData()); std::vector inputs = {&yt_tensor, &x_tensor}; std::vector outputs = {&out_tensor}; @@ -346,14 +340,13 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { // delete conv_param; x_tensor.SetData(nullptr); yt_tensor.SetData(nullptr); - out_tensor.SetData(nullptr); delete kernel; MS_LOG(INFO) << "AvgPoolGradStride3Fp32 Filter Grad passed"; } TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) { // prepare stage - auto pooling_param = new PoolingParameter(); + auto pooling_param = static_cast(malloc(sizeof(PoolingParameter))); InitPoolingParamFP32(pooling_param); pooling_param->output_channel_ = 3; pooling_param->pool_mode_ = PoolMode_MaxPool; @@ -395,10 +388,11 @@ TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) { } std::cout << std::endl; std::string output_path = "./test_data/pooling/maxpoolgradfp32_1_xgrad_1_28_28_3.bin"; - lite::CompareOutput(output_data, output_path); + auto res = lite::CompareOutput(output_data, output_path); + EXPECT_EQ(res, 0); + free(pooling_param); delete[] in_data; - delete pooling_param; delete[] dy_data; delete[] dx_data; delete[] output_data; @@ -526,7 +520,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolingKernelGradFp32) { TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { // prepare stage // input size will be equal to the original size of x, output size will be the output size as in forward - auto maxpool = new PoolingParameter(); + auto maxpool = static_cast(malloc(sizeof(PoolingParameter))); InitPoolingParamFP32(maxpool); maxpool->output_channel_ = 3; maxpool->pool_mode_ = PoolMode_MaxPool; @@ -534,7 +528,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { maxpool->output_batch_ = 3; size_t input_size; - size_t y_data_size = maxpool->output_batch_ * maxpool->output_channel_ * maxpool->input_h_ * maxpool->input_w_; auto x_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_x_3_28_28_3.bin", &input_size)); @@ -553,10 +546,9 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); yt_tensor.SetData(yt_data); - auto out_data = new float[y_data_size]; lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x); - out_tensor.SetData(out_data); - + out_tensor.MallocData(); + auto out_data = static_cast(out_tensor.MutableData()); std::vector maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor}; std::vector maxpool_outputs = {&out_tensor}; // ---------------------------------------- @@ -585,7 +577,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { x_tensor.SetData(nullptr); y_tensor.SetData(nullptr); yt_tensor.SetData(nullptr); - out_tensor.SetData(nullptr); delete kernel; MS_LOG(INFO) << "MaxPoolGradBatchFp32 Filter Grad passed"; } @@ -593,7 +584,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { // prepare stage // input size will be equal to the original size of x, output size will be the output size as in forward - auto maxpool = new PoolingParameter(); + auto maxpool = static_cast(malloc(sizeof(PoolingParameter))); InitPoolingParamFP32(maxpool); maxpool->output_channel_ = 3; maxpool->input_channel_ = 3; @@ -606,7 +597,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { maxpool->stride_w_ = 2; size_t input_size; - size_t y_data_size = maxpool->output_batch_ * maxpool->output_channel_ * maxpool->input_h_ * maxpool->input_w_; auto x_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_x_3_28_28_3.bin", &input_size)); @@ -625,9 +615,9 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); yt_tensor.SetData(yt_data); - auto out_data = new float[y_data_size]; lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x); - out_tensor.SetData(out_data); + out_tensor.MallocData(); + auto out_data = static_cast(out_tensor.MutableData()); std::vector maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor}; std::vector maxpool_outputs = {&out_tensor}; @@ -657,7 +647,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { x_tensor.SetData(nullptr); y_tensor.SetData(nullptr); yt_tensor.SetData(nullptr); - out_tensor.SetData(nullptr); delete kernel; MS_LOG(INFO) << "MaxPoolGradStride2Fp32 Filter Grad passed"; } @@ -665,7 +654,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { // prepare stage // input size will be equal to the original size of x, output size will be the output size as in forward - auto maxpool = new PoolingParameter(); + auto maxpool = static_cast(malloc(sizeof(PoolingParameter))); InitPoolingParamFP32(maxpool); maxpool->output_channel_ = 3; maxpool->input_channel_ = 3; @@ -678,7 +667,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { maxpool->stride_w_ = 3; size_t input_size; - size_t y_data_size = maxpool->output_batch_ * maxpool->output_channel_ * maxpool->input_h_ * maxpool->input_w_; auto x_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_x_3_28_28_3.bin", &input_size)); @@ -697,9 +685,9 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); yt_tensor.SetData(yt_data); - auto out_data = new float[y_data_size]; lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x); - out_tensor.SetData(out_data); + out_tensor.MallocData(); + auto out_data = static_cast(out_tensor.MutableData()); std::vector maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor}; std::vector maxpool_outputs = {&out_tensor}; @@ -729,7 +717,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { x_tensor.SetData(nullptr); y_tensor.SetData(nullptr); yt_tensor.SetData(nullptr); - out_tensor.SetData(nullptr); delete kernel; MS_LOG(INFO) << "MaxPoolGradStride3Fp32 Filter Grad passed"; } diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_grad_fp32_tests.cc new file mode 100644 index 00000000000..7d8c766b6ac --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_grad_fp32_tests.cc @@ -0,0 +1,696 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +// #include "mindspore/lite/src/ir/tensor.h" +// #include "mindspore/lite/src/lite_kernel.h" + +#include "mindspore/lite/include/context.h" +#include "utils/log_adapter.h" +#include "common/common_test.h" +#include "mindspore/lite/src/kernel_registry.h" +#include "src/common/utils.h" +#include "src/common/file_utils.h" +#include "src/common/file_utils_ext.h" + +#include "mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h" +#include "mindspore/lite/nnacl/fp32_grad/softmax_grad.h" + +namespace mindspore { +class TestSoftmaxGradFp32 : public mindspore::CommonTest { + public: + TestSoftmaxGradFp32() {} +}; + +void InitSoftMaxParam(SoftmaxParameter *softmax_param, int axis) { + softmax_param->axis_ = axis; + softmax_param->element_size_ = 1188; + softmax_param->n_dim_ = 4; + softmax_param->input_shape_[0] = 1; + softmax_param->input_shape_[1] = 9; + softmax_param->input_shape_[2] = 11; + softmax_param->input_shape_[3] = 12; +} + +void InitSoftMaxParam(SoftmaxParameter *softmax_param, int axis, int n, int c, int h, int w) { + softmax_param->axis_ = axis; + softmax_param->element_size_ = n * c * h * w; + softmax_param->n_dim_ = 4; + softmax_param->input_shape_[0] = n; + softmax_param->input_shape_[1] = c; + softmax_param->input_shape_[2] = h; + softmax_param->input_shape_[3] = w; +} + +#if 0 // kernel testing +TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis0) { + auto softmax_param = reinterpret_cast(malloc(sizeof(SoftmaxParameter))); + // set parameters + InitSoftMaxParam(softmax_param, 0); + + std::vector shape = {1, 9, 11, 12}; + size_t input_size; + std::string input_path = "./test_data/softmax/softmaxgrad_yinput.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); + input_tensor.SetData(input_data); + + std::string yt_path = "./test_data/softmax/softmaxgrad_yt_input.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); + yt_tensor.SetData(yt_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + auto out_data = new float[softmax_param->element_size_]; + lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); + out_tensor.SetData(out_data); + + std::vector inputs = {&input_tensor, &yt_tensor}; + std::vector outputs = {&out_tensor}; + + // float sum_data[6]; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(softmax_param), NULL, desc, nullptr); + + kernel->Init(); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 3; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/softmax/softmaxgrad_out.bin"; + // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + auto res = lite::CompareRelativeOutput(out_data, output_path); + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] yt_data; + delete[] out_data; + input_tensor.SetData(nullptr); + yt_tensor.SetData(nullptr); + out_tensor.SetData(nullptr); + delete kernel; + // delete softmax_param; + + MS_LOG(INFO) << "SoftmaxGradKernelAxis0 passed"; +} + +TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis1) { + auto softmax_param = reinterpret_cast(malloc(sizeof(SoftmaxParameter))); + // set parameters + InitSoftMaxParam(softmax_param, 1); + + std::vector shape = {1, 9, 11, 12}; + size_t input_size; + std::string input_path = "./test_data/softmax/softmaxgrad_1_yinput.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); + input_tensor.SetData(input_data); + + std::string yt_path = "./test_data/softmax/softmaxgrad_1_yt_input.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); + yt_tensor.SetData(yt_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + auto out_data = new float[softmax_param->element_size_]; + lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); + out_tensor.SetData(out_data); + + std::vector inputs = {&input_tensor, &yt_tensor}; + std::vector outputs = {&out_tensor}; + + // float sum_data[6]; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(softmax_param), NULL, desc, nullptr); + + kernel->Init(); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 3; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/softmax/softmaxgrad_1_out.bin"; + // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + auto res = lite::CompareRelativeOutput(out_data, output_path); + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] yt_data; + delete[] out_data; + input_tensor.SetData(nullptr); + yt_tensor.SetData(nullptr); + out_tensor.SetData(nullptr); + delete kernel; + // delete softmax_param; + + MS_LOG(INFO) << "SoftmaxGradKernelAxis1 passed"; +} + +TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis2) { + auto softmax_param = reinterpret_cast(malloc(sizeof(SoftmaxParameter))); + // set parameters + InitSoftMaxParam(softmax_param, 2); + + std::vector shape = {1, 9, 11, 12}; + size_t input_size; + std::string input_path = "./test_data/softmax/softmaxgrad_2_yinput.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); + input_tensor.SetData(input_data); + + std::string yt_path = "./test_data/softmax/softmaxgrad_2_yt_input.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); + yt_tensor.SetData(yt_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + auto out_data = new float[softmax_param->element_size_]; + lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); + out_tensor.SetData(out_data); + + std::vector inputs = {&input_tensor, &yt_tensor}; + std::vector outputs = {&out_tensor}; + + // float sum_data[6]; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(softmax_param), NULL, desc, nullptr); + + kernel->Init(); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 3; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/softmax/softmaxgrad_2_out.bin"; + // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + auto res = lite::CompareRelativeOutput(out_data, output_path); + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] yt_data; + delete[] out_data; + input_tensor.SetData(nullptr); + yt_tensor.SetData(nullptr); + out_tensor.SetData(nullptr); + delete kernel; + // delete softmax_param; + + MS_LOG(INFO) << "SoftmaxGradKernelAxis2 passed"; +} + +TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis3) { + auto softmax_param = reinterpret_cast(malloc(sizeof(SoftmaxParameter))); + // set parameters + InitSoftMaxParam(softmax_param, 3); + + std::vector shape = {1, 9, 11, 12}; + size_t input_size; + std::string input_path = "./test_data/softmax/softmaxgrad_3_yinput.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); + input_tensor.SetData(input_data); + + std::string yt_path = "./test_data/softmax/softmaxgrad_3_yt_input.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); + yt_tensor.SetData(yt_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + auto out_data = new float[softmax_param->element_size_]; + lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); + out_tensor.SetData(out_data); + + std::vector inputs = {&input_tensor, &yt_tensor}; + std::vector outputs = {&out_tensor}; + + // float sum_data[6]; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(softmax_param), NULL, desc, nullptr); + + kernel->Init(); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 3; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/softmax/softmaxgrad_3_out.bin"; + // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + auto res = lite::CompareRelativeOutput(out_data, output_path); + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] yt_data; + delete[] out_data; + input_tensor.SetData(nullptr); + yt_tensor.SetData(nullptr); + out_tensor.SetData(nullptr); + delete kernel; + // delete softmax_param; + + MS_LOG(INFO) << "SoftmaxGradKernelAxis3 passed"; +} + +TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxisMinus1) { + auto softmax_param = reinterpret_cast(malloc(sizeof(SoftmaxParameter))); + // set parameters + InitSoftMaxParam(softmax_param, -1); + + std::vector shape = {1, 9, 11, 12}; + size_t input_size; + std::string input_path = "./test_data/softmax/softmaxgrad_-1_yinput.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); + input_tensor.SetData(input_data); + + std::string yt_path = "./test_data/softmax/softmaxgrad_-1_yt_input.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); + yt_tensor.SetData(yt_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + auto out_data = new float[softmax_param->element_size_]; + lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); + out_tensor.SetData(out_data); + + std::vector inputs = {&input_tensor, &yt_tensor}; + std::vector outputs = {&out_tensor}; + + // float sum_data[6]; + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(softmax_param), NULL, desc, nullptr); + + kernel->Init(); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 3; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/softmax/softmaxgrad_-1_out.bin"; + // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + auto res = lite::CompareRelativeOutput(out_data, output_path); + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] yt_data; + delete[] out_data; + input_tensor.SetData(nullptr); + yt_tensor.SetData(nullptr); + out_tensor.SetData(nullptr); + delete kernel; + // delete softmax_param; + + MS_LOG(INFO) << "SoftmaxGradKernelAxisMinus1 passed"; +} +#endif // kernel testing + +TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) { + auto softmax_param = new SoftmaxParameter(); + // set parameters + InitSoftMaxParam(softmax_param, 0); + + int inner_size = 1; + if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1; + for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) { + inner_size *= softmax_param->input_shape_[i]; + } + float *sum_data = new (std::nothrow) float[inner_size]; + float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; + std::vector shape = {1, 9, 11, 12}; + size_t input_size; + std::string input_path = "./test_data/softmax/softmaxgrad_yinput.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + std::string yt_path = "./test_data/softmax/softmaxgrad_yt_input.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + auto out_data = new float[softmax_param->element_size_]; + + // warm up loop + for (int i = 0; i < 3; i++) { + SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); + } + + int loop_count = 3; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/softmax/softmaxgrad_out.bin"; + + auto res = lite::CompareRelativeOutput(out_data, output_path); + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] yt_data; + delete[] out_data; + delete[] sum_data; + delete[] sum_mul; + + delete softmax_param; + + MS_LOG(INFO) << "SoftmaxGradAxis0 passed"; +} + +TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis1) { + auto softmax_param = new SoftmaxParameter(); + // set parameters + InitSoftMaxParam(softmax_param, 1); + + int inner_size = 1; + if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1; + for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) { + inner_size *= softmax_param->input_shape_[i]; + } + float *sum_data = new (std::nothrow) float[inner_size]; + float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; + + std::vector shape = {1, 9, 11, 12}; + size_t input_size; + std::string input_path = "./test_data/softmax/softmaxgrad_1_yinput.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + std::string yt_path = "./test_data/softmax/softmaxgrad_1_yt_input.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + auto out_data = new float[softmax_param->element_size_]; + + // warm up loop + for (int i = 0; i < 3; i++) { + SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); + } + + int loop_count = 3; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/softmax/softmaxgrad_1_out.bin"; + // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + auto res = lite::CompareRelativeOutput(out_data, output_path); + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] yt_data; + delete[] out_data; + delete[] sum_data; + delete[] sum_mul; + + delete softmax_param; + + MS_LOG(INFO) << "SoftmaxGradAxis1 passed"; +} + +TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis2) { + auto softmax_param = new SoftmaxParameter(); + // set parameters + InitSoftMaxParam(softmax_param, 2); + + int inner_size = 1; + if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1; + for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) { + inner_size *= softmax_param->input_shape_[i]; + } + float *sum_data = new (std::nothrow) float[inner_size]; + float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; + + std::vector shape = {1, 9, 11, 12}; + size_t input_size; + std::string input_path = "./test_data/softmax/softmaxgrad_2_yinput.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + std::string yt_path = "./test_data/softmax/softmaxgrad_2_yt_input.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + auto out_data = new float[softmax_param->element_size_]; + + // warm up loop + for (int i = 0; i < 3; i++) { + SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); + } + + int loop_count = 3; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/softmax/softmaxgrad_2_out.bin"; + // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + auto res = lite::CompareRelativeOutput(out_data, output_path); + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] yt_data; + delete[] out_data; + delete[] sum_data; + delete[] sum_mul; + + delete softmax_param; + + MS_LOG(INFO) << "SoftmaxGradAxis2 passed"; +} + +TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis3) { + auto softmax_param = new SoftmaxParameter(); + // set parameters + InitSoftMaxParam(softmax_param, 3); + + int inner_size = 1; + if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1; + for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) { + inner_size *= softmax_param->input_shape_[i]; + } + float *sum_data = new (std::nothrow) float[inner_size]; + float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; + + std::vector shape = {1, 9, 11, 12}; + size_t input_size; + std::string input_path = "./test_data/softmax/softmaxgrad_3_yinput.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + std::string yt_path = "./test_data/softmax/softmaxgrad_3_yt_input.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + auto out_data = new float[softmax_param->element_size_]; + + // warm up loop + for (int i = 0; i < 3; i++) { + SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); + } + + int loop_count = 3; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/softmax/softmaxgrad_3_out.bin"; + // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + auto res = lite::CompareRelativeOutput(out_data, output_path); + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] yt_data; + delete[] out_data; + delete[] sum_data; + delete[] sum_mul; + + delete softmax_param; + + MS_LOG(INFO) << "SoftmaxGradAxis3 passed"; +} + +TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxisMinus1) { + auto softmax_param = new SoftmaxParameter(); + // set parameters + InitSoftMaxParam(softmax_param, -1); + + int inner_size = 1; + if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1; + for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) { + inner_size *= softmax_param->input_shape_[i]; + } + float *sum_data = new (std::nothrow) float[inner_size]; + float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; + + std::vector shape = {1, 9, 11, 12}; + size_t input_size; + std::string input_path = "./test_data/softmax/softmaxgrad_-1_yinput.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + std::string yt_path = "./test_data/softmax/softmaxgrad_-1_yt_input.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + auto out_data = new float[softmax_param->element_size_]; + + // warm up loop + for (int i = 0; i < 3; i++) { + SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); + } + + int loop_count = 3; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/softmax/softmaxgrad_-1_out.bin"; + // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + + auto res = lite::CompareRelativeOutput(out_data, output_path); + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] yt_data; + delete[] out_data; + delete[] sum_data; + delete[] sum_mul; + + delete softmax_param; + + MS_LOG(INFO) << "SoftmaxGradAxisMinus1 passed"; +} + +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dx_2_4_5_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dx_2_4_5_3.bin new file mode 100644 index 00000000000..daf908d964d --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dx_2_4_5_3.bin @@ -0,0 +1,3 @@ +nÍ*7Lº‰·é{+¸x–·.V7{ε‰7öó¤·ÓúØ8?xÓ6Ò)°7“{¹7 Ó—¶ì'¨µ³:^7Ra%¶í7Sû÷˜ï27öó¤´Ý:×·˜ï²·DݶSûö[äض#Š–6¾|¸´[¸µšÇA¶Tü™7ÇDŠ·hËÑ6Ý:W7Ra%7ýló¶h{À·…á58"·Rúm·Ê”n5¹+¸·³;´71ÂP·Íñ¶½zÜ5/š¶¾8Tü¸?xS·öó¤·>{G·Ê”î7È]3·{N7 Ó6î:x7¼"µÂ»53¼7óºÓ6ñf’75Ïa7{N·[äض¯_»6Óû.·Â»7öó¤7:ú7ÜU7^¸Äœ‡·ÇDŠ·î:x7i|–¶ÐÇ=·(ð·:ú·¦¥57éÔù4ÎÛ/7“¼c·¾‘68é{+6ÜU· Ÿî·2:ó7 Ó7 +Œž7Sû÷ê 7•aCµèyÿ·´[87|cË·´< +8´[86ò¶Tü™·`¨´ó ö7³:Þ6# N¶!7h7»Ì·D«¢·Ã÷4·yÜ·!ä›·Óñ6ˆ;»7ê ‘78"¸(ûJ· È6ûÄ73;É7?xÓ4m1P¸nœ8 \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/efficientnet_b0_f.ms b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/efficientnet_b0_f.ms index 67f49ee8146..a00bbee9fbd 100644 Binary files a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/efficientnet_b0_f.ms and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/efficientnet_b0_f.ms differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/efficientnet_b0_f.pb.fbs.ms b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/effnetb0_fwd_nofuse.ms similarity index 75% rename from mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/efficientnet_b0_f.pb.fbs.ms rename to mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/effnetb0_fwd_nofuse.ms index e5a0f0cbe60..8d93c74a5e1 100644 Binary files a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/efficientnet_b0_f.pb.fbs.ms and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/effnetb0_fwd_nofuse.ms differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_-1_out.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_-1_out.bin new file mode 100644 index 00000000000..6a40fccef6a Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_-1_out.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_-1_yinput.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_-1_yinput.bin new file mode 100644 index 00000000000..1a279a28c00 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_-1_yinput.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_-1_yt_input.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_-1_yt_input.bin new file mode 100644 index 00000000000..6b9c69a4430 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_-1_yt_input.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_1_out.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_1_out.bin new file mode 100644 index 00000000000..8ee2502ff02 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_1_out.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_1_yinput.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_1_yinput.bin new file mode 100644 index 00000000000..fc3fd0bc6df Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_1_yinput.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_1_yt_input.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_1_yt_input.bin new file mode 100644 index 00000000000..0853115a958 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_1_yt_input.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_2_out.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_2_out.bin new file mode 100644 index 00000000000..2794514ab02 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_2_out.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_2_yinput.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_2_yinput.bin new file mode 100644 index 00000000000..6014c438fe7 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_2_yinput.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_2_yt_input.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_2_yt_input.bin new file mode 100644 index 00000000000..26a5b574455 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_2_yt_input.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_3_out.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_3_out.bin new file mode 100644 index 00000000000..6a40fccef6a Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_3_out.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_3_yinput.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_3_yinput.bin new file mode 100644 index 00000000000..1a279a28c00 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_3_yinput.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_3_yt_input.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_3_yt_input.bin new file mode 100644 index 00000000000..6b9c69a4430 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_3_yt_input.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_out.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_out.bin new file mode 100644 index 00000000000..4f39fbda5f7 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_out.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_yinput.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_yinput.bin new file mode 100644 index 00000000000..72af13a1a86 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_yinput.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_yt_input.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_yt_input.bin new file mode 100644 index 00000000000..541286c1aa7 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/softmax/softmaxgrad_yt_input.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/train/train_output_32_10.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/train/train_output_32_10.bin new file mode 100644 index 00000000000..7a2d56f2d5a Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/train/train_output_32_10.bin differ diff --git a/mindspore/lite/tools/anf_exporter/anf_exporter.cc b/mindspore/lite/tools/anf_exporter/anf_exporter.cc index ad2c8e53b5d..6deca609070 100644 --- a/mindspore/lite/tools/anf_exporter/anf_exporter.cc +++ b/mindspore/lite/tools/anf_exporter/anf_exporter.cc @@ -56,6 +56,36 @@ void AnfExporter::RemoveIfMakeTuple(const CNodePtr &cnode) { } } +void AnfExporter::RemoveIfDepend(const CNodePtr &cnode) { + bool hasDepend = false; + std::vector inputs; + inputs.clear(); + + inputs.emplace_back(cnode->input(0)); + for (size_t i = 1; i < cnode->inputs().size(); ++i) { + AnfNodePtr inputNode = cnode->input(i); + if (!inputNode->isa()) { + inputs.emplace_back(cnode->input(i)); + continue; + } + auto dependNode = utils::cast(inputNode); + if (IsPrimitiveCNode(dependNode, schema::PrimitiveType_Depend)) { + hasDepend = true; + for (size_t j = 1; j < dependNode->inputs().size(); ++j) { + AnfNodePtr dependInputNode = dependNode->input(j); + if (dependInputNode->isa()) { + inputs.emplace_back(dependInputNode); + } + } + } else { + inputs.emplace_back(cnode->input(i)); + } + } + if (hasDepend) { + cnode->set_inputs(inputs); + } +} + int AnfExporter::ConvertQuantParam(const std::unique_ptr &meta_graph, const std::shared_ptr primitive, const std::unique_ptr &dst_node) { @@ -175,10 +205,12 @@ schema::MetaGraphT *AnfExporter::Export(const FuncGraphPtr &func_graph, bool kee return nullptr; } if (primitive_c->Type() == schema::PrimitiveType_TupleGetItem || - primitive_c->Type() == schema::PrimitiveType_MakeTuple) { + primitive_c->Type() == schema::PrimitiveType_MakeTuple || + primitive_c->Type() == schema::PrimitiveType_Depend) { continue; } RemoveIfMakeTuple(cnode); + RemoveIfDepend(cnode); auto primT = primitive_c->GetPrimitiveT(); auto node = std::make_unique(); @@ -336,9 +368,49 @@ int AnfExporter::ConvertInputValueNode(std::shared_ptr input_anode, output_cnode->inputIndex.emplace_back(meta_graphT->allTensors.size()); meta_graphT->allTensors.emplace_back(std::move(paramTensor)); } else if (value->isa()) { - MS_LOG(DEBUG) << "Value type is ValueSequence."; - return RET_OK; - } else { + auto valueAbstract = valueNode->abstract(); + auto abstractSequnce = utils::cast(valueAbstract); + if (abstractSequnce->isa()) { + auto abstractTuple = utils::cast(valueAbstract); + auto x_shape_data = abstractTuple->elements(); + std::vector shape; + for (std::size_t i = 0; i < abstractTuple->size(); ++i) { + auto value_track = x_shape_data[i]->GetValueTrack(); + MS_EXCEPTION_IF_NULL(value_track); + if (value_track->isa()) { + shape.push_back((GetValue(value_track))); + } else { + MS_LOG(ERROR) << "Value type is ValueSequence is not integer, it is " + << value_track->ToString() << "."; + } + } + if (shape.size()) { + auto typePtr = abstractTuple->elements()[0]->GetTypeTrack(); // abstractTuple->GetTypeTrack(); + paramTensor->dataType = typePtr->type_id(); + paramTensor->dims = {static_cast(shape.size())}; + paramTensor->nodeType = schema::NodeType_ValueNode; + paramTensor->data.resize(shape.size() * sizeof(int)); + memcpy(paramTensor->data.data(), shape.data(), shape.size() * sizeof(int)); + node_id_map_[valueNode->fullname_with_scope()] = meta_graphT->allTensors.size(); + output_cnode->inputIndex.emplace_back(meta_graphT->allTensors.size()); + meta_graphT->allTensors.emplace_back(std::move(paramTensor)); + } + } else { + MS_LOG(ERROR) << "Value type is ValueSequence not supported - " << valueAbstract->type_name() << "."; + } + } else if (value->isa()) { + auto valueAbstract = valueNode->abstract(); + auto abstractScalar = utils::cast(valueAbstract); + auto typePtr = abstractScalar->GetTypeTrack(); + paramTensor->dataType = typePtr->type_id(); + paramTensor->dims = {1}; + paramTensor->nodeType = schema::NodeType_ValueNode; + auto data = value->cast(); + paramTensor->data.emplace_back(data->value()); + node_id_map_[valueNode->fullname_with_scope()] = meta_graphT->allTensors.size(); + output_cnode->inputIndex.emplace_back(meta_graphT->allTensors.size()); + meta_graphT->allTensors.emplace_back(std::move(paramTensor)); + } else { MS_LOG(ERROR) << "Not support value type , need add support."; return RET_ERROR; } diff --git a/mindspore/lite/tools/anf_exporter/anf_exporter.h b/mindspore/lite/tools/anf_exporter/anf_exporter.h index 42dcec36a6c..28cadf1cd8e 100644 --- a/mindspore/lite/tools/anf_exporter/anf_exporter.h +++ b/mindspore/lite/tools/anf_exporter/anf_exporter.h @@ -36,6 +36,7 @@ class AnfExporter { int SetOpInputNode(const CNodePtr &cnode, const std::unique_ptr &meta_graphT, schema::CNodeT *fb_node); void RemoveIfMakeTuple(const CNodePtr &cnode); + void RemoveIfDepend(const CNodePtr &cnode); protected: int ConvertInputCNode(const std::shared_ptr input_anode, schema::CNodeT *output_cnode); diff --git a/mindspore/lite/tools/common/node_util.cc b/mindspore/lite/tools/common/node_util.cc index f4ec2c225d0..f750b6eda4c 100644 --- a/mindspore/lite/tools/common/node_util.cc +++ b/mindspore/lite/tools/common/node_util.cc @@ -30,6 +30,7 @@ static const std::vector nhwcOpList = { schema::PrimitiveType_Conv2DGradInput, schema::PrimitiveType_PoolingGrad, schema::PrimitiveType_BiasGrad, + schema::PrimitiveType_BNGrad, #endif schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, @@ -39,7 +40,20 @@ static const std::vector nhwcOpList = { schema::PrimitiveType_Resize, schema::PrimitiveType_BatchNorm, schema::PrimitiveType_FusedBatchNorm, - schema::PrimitiveType_PReLU}; + schema::PrimitiveType_PReLU, + schema::PrimitiveType_BiasAdd}; + +static const std::vector nhwcOpDualInputList = { +#ifdef SUPPORT_TRAIN + schema::PrimitiveType_Conv2DGradFilter +#endif +}; + +static const std::vector nhwcOpAllInputList = { +#ifdef SUPPORT_TRAIN + schema::PrimitiveType_PoolingGrad +#endif +}; static const std::vector fp32FullOpList = { schema::PrimitiveType_Concat, schema::PrimitiveType_Add, @@ -73,6 +87,10 @@ std::vector Getfp32FullOpList() { return fp32FullOpList; std::vector GetNhwcOpList() { return nhwcOpList; } +std::vector GetNhwcDualInputOpList() { return nhwcOpDualInputList; } + +std::vector GetNhwcAllInputOpList() { return nhwcOpAllInputList; } + std::vector GetUint8NhwcOpList() { return int8NeedNhwcOpList; } std::vector GetUint8OpList() { return int8OpList; } diff --git a/mindspore/lite/tools/common/node_util.h b/mindspore/lite/tools/common/node_util.h index 7eed217d4f4..2e096a541ac 100644 --- a/mindspore/lite/tools/common/node_util.h +++ b/mindspore/lite/tools/common/node_util.h @@ -36,6 +36,8 @@ std::vector GetNhwcOpList(); std::vector GetNhwcDualInputOpList(); +std::vector GetNhwcAllInputOpList(); + std::vector Getfp32FullOpList(); std::vector GetUint8NhwcOpList(); diff --git a/mindspore/lite/tools/converter/anf_transform.cc b/mindspore/lite/tools/converter/anf_transform.cc index 12d89535c19..63f47a1718d 100644 --- a/mindspore/lite/tools/converter/anf_transform.cc +++ b/mindspore/lite/tools/converter/anf_transform.cc @@ -40,17 +40,24 @@ FuncGraphPtr AnfTransform::Transform(const FuncGraphPtr &old_graph, const conver // fusion const_fold auto optimizer = std::make_shared(); auto pm = std::make_shared("anf fusion pass manager", false); - pm->AddPass(std::make_shared()); - pm->AddPass(std::make_shared()); - pm->AddPass(std::make_shared()); - pm->AddPass(std::make_shared(true, "conv_relu", schema::PrimitiveType_Activation, - schema::ActivationType_RELU)); - pm->AddPass(std::make_shared(true, "conv_relu6", schema::PrimitiveType_Activation, - schema::ActivationType_RELU6)); - pm->AddPass(std::make_shared( - true, "conv_tuple_relu", schema::PrimitiveType_Activation, schema::ActivationType_RELU)); - pm->AddPass(std::make_shared( - true, "conv_tuple_relu6", schema::PrimitiveType_Activation, schema::ActivationType_RELU6)); + + // for now - trainning is not supporting fuse operations + if (config != nullptr && config->trainModel == false) { + pm->AddPass(std::make_shared()); + pm->AddPass(std::make_shared()); + pm->AddPass(std::make_shared()); + pm->AddPass(std::make_shared(true, "conv_relu", schema::PrimitiveType_Activation, + schema::ActivationType_RELU)); + pm->AddPass(std::make_shared(true, "conv_relu6", schema::PrimitiveType_Activation, + schema::ActivationType_RELU6)); + pm->AddPass(std::make_shared(true, "conv_tuple_relu", + schema::PrimitiveType_Activation, + schema::ActivationType_RELU)); + pm->AddPass(std::make_shared(true, "conv_tuple_relu6", + schema::PrimitiveType_Activation, + schema::ActivationType_RELU6)); + } + pm->AddPass(std::make_shared()); optimizer->AddPassManager(pm); FuncGraphPtr new_graph = optimizer->Optimize(old_graph); diff --git a/mindspore/lite/tools/converter/converter_flags.cc b/mindspore/lite/tools/converter/converter_flags.cc index 8e17a68cfe0..833a3ae40b2 100644 --- a/mindspore/lite/tools/converter/converter_flags.cc +++ b/mindspore/lite/tools/converter/converter_flags.cc @@ -41,6 +41,8 @@ Flags::Flags() { "16"); AddFlag(&Flags::configFile, "config_file", "Configuration for post-training.", ""); AddFlag(&Flags::formatTrans, "formatTrans", "whether transform format. true | false", "true"); + AddFlag(&Flags::trainModelIn, "trainModel", "whether the model is going to be trained on device." + " true | false", "false"); } int Flags::Init(int argc, const char **argv) { @@ -128,6 +130,15 @@ int Flags::Init(int argc, const char **argv) { return 1; } + + if (this->trainModelIn == "true") { + this->trainModel = true; + } else if (this->trainModelIn == "false") { + this->trainModel = false; + } else { + std::cerr << "INPUT ILLEGAL: trainModel must be true|false "; + return 1; + } return 0; } } // namespace converter diff --git a/mindspore/lite/tools/converter/converter_flags.h b/mindspore/lite/tools/converter/converter_flags.h index 0dc118c077e..7e1b8224665 100644 --- a/mindspore/lite/tools/converter/converter_flags.h +++ b/mindspore/lite/tools/converter/converter_flags.h @@ -68,6 +68,8 @@ class Flags : public virtual mindspore::lite::FlagParser { std::string configFile; bool formatTrans = true; std::string convWeightQuantChannelThreshold; + std::string trainModelIn; + bool trainModel = false; }; } // namespace converter } // namespace lite diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/format_trans_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/format_trans_pass.cc index 5f644d998e1..624175c97b2 100644 --- a/mindspore/lite/tools/converter/legacy_optimizer/graph/format_trans_pass.cc +++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/format_trans_pass.cc @@ -146,11 +146,29 @@ STATUS FormatTransPass::DoNodeInoutFormatTrans(schema::MetaGraphT *graph) { MS_LOG(ERROR) << "InsertNhwc2NchwNode before " << nodeName << "failed"; return RET_ERROR; } - - iter = InsertFormatTransNode(graph, iter, kAfter, 0, afterNodeType, &status); - if (status != RET_OK) { - MS_LOG(ERROR) << "InsertNhwc2NchwNode after " << nodeName << "failed"; - return RET_ERROR; + if (IsContain(GetNhwcAllInputOpList(), GetCNodeTType(**iter))) { + int idx_num = node->inputIndex.size(); + for (int i = 0; i < idx_num; i++) { + iter = InsertFormatTransNode(graph, iter, kBefore, i, beforeNodeType, &status); + if (status != RET_OK) { + MS_LOG(ERROR) << "InsertNchw2NhwcNode before " << nodeName << "failed"; + return RET_ERROR; + } + } + } else if (IsContain(GetNhwcDualInputOpList(), GetCNodeTType(**iter))) { + for (int i = 0; i < 2; i++) { + iter = InsertFormatTransNode(graph, iter, kBefore, i, beforeNodeType, &status); + if (status != RET_OK) { + MS_LOG(ERROR) << "InsertNchw2NhwcNode before " << nodeName << "failed"; + return RET_ERROR; + } + } + } else { + iter = InsertFormatTransNode(graph, iter, kAfter, 0, afterNodeType, &status); + if (status != RET_OK) { + MS_LOG(ERROR) << "InsertNhwc2NchwNode after " << nodeName << "failed"; + return RET_ERROR; + } } } return RET_OK;