forked from mindspore-Ecosystem/mindspore
commit
2f14c40934
|
@ -27,7 +27,6 @@ struct Model;
|
|||
}
|
||||
|
||||
namespace session {
|
||||
|
||||
class TrainSession : public lite::LiteSession {
|
||||
public:
|
||||
TrainSession();
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
void BatchNormFp32(const void *input, const void *mean, const void *variance,
|
||||
BatchNormParameter *param, int task_id, void *output) {
|
||||
void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id,
|
||||
void *output) {
|
||||
int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_);
|
||||
int completed_units = task_id * units_per_thread;
|
||||
int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units);
|
||||
|
@ -31,7 +31,7 @@ void BatchNormFp32(const void *input, const void *mean, const void *variance,
|
|||
for (int c = 0; c < param->channel_; c++) {
|
||||
float variance_sqrt = sqrt(((const float *)variance)[c] + param->epsilon_);
|
||||
((float *)output)[cur_offset + c] =
|
||||
(((const float *)input)[cur_offset + c] - ((const float *)mean)[c]) / variance_sqrt;
|
||||
(((const float *)input)[cur_offset + c] - ((const float *)mean)[c]) / variance_sqrt;
|
||||
}
|
||||
cur_offset += param->channel_;
|
||||
}
|
||||
|
@ -53,3 +53,22 @@ void FusedBatchNormFp32(const void *input, const void *scale, const void *offset
|
|||
cur_offset += param->channel_;
|
||||
}
|
||||
}
|
||||
|
||||
void FusedBatchNormFp32MeanVar(const float *input, float momentum, float *run_mean, float *run_var,
|
||||
BatchNormParameter *param, float *save_mean, float *save_inv_var) {
|
||||
float N = param->channel_ * param->unit_;
|
||||
for (int i = 0; i < param->unit_; i++) {
|
||||
for (int f = 0; f < param->channel_; f++) {
|
||||
int idx = i * param->channel_ + f;
|
||||
run_mean[f] += input[idx];
|
||||
run_var[f] += input[idx] * input[idx];
|
||||
}
|
||||
}
|
||||
for (int f = 0; f < param->channel_; f++) {
|
||||
run_mean[f] = run_mean[f] / N;
|
||||
run_var[f] = run_var[f] / N - run_mean[f] * run_mean[f];
|
||||
save_mean[f] = momentum * save_mean[f] + (1 - momentum) * run_mean[f];
|
||||
float inv_var = 1.f/sqrt(run_var[f]+param->epsilon_);
|
||||
save_inv_var[f] = momentum * save_inv_var[f] + (1 - momentum) * inv_var;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,6 +28,8 @@ void BatchNormFp32(const void *input, const void *mean, const void *variance, Ba
|
|||
void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean,
|
||||
const void *variance, BatchNormParameter *param, int task_id, void *output);
|
||||
|
||||
void FusedBatchNormFp32MeanVar(const float *input, float momentum, float *run_mean, float *run_var,
|
||||
BatchNormParameter *param, float *save_mean, float *save_var);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -27,20 +27,40 @@ void sumSpatialBatch(const float *in, int size, int ch, float *out) {
|
|||
}
|
||||
}
|
||||
|
||||
void scaleBias(const float *scales, int batch, int n, int size, float *output) {
|
||||
for (int i = 0; i < batch * size; i++)
|
||||
for (int c = 0; c < n; c++) output[i * n + c] *= scales[c];
|
||||
static void meanVar(const float *in, int size, int ch, float eps, float *mean, float *invar) {
|
||||
float N = (float)size;
|
||||
sumSpatialBatch(in, N, ch, mean);
|
||||
for (int f = 0; f < ch; ++f) {
|
||||
mean[f] /= N;
|
||||
}
|
||||
for (int f=0; f< ch; f++) {
|
||||
float tvar = 0;
|
||||
for (int i =0; i< N; i++) {
|
||||
float x = in[i*ch +f];
|
||||
tvar += (x-mean[f]) *(x-mean[f]);
|
||||
}
|
||||
invar[f] = 1.0f/(sqrt(tvar/N+eps));
|
||||
}
|
||||
}
|
||||
|
||||
void normalize(const float *x, const float *mean, const float *invar, int batch, int filters, int spatial,
|
||||
float *out) {
|
||||
int b, f, i;
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (i = 0; i < spatial; ++i) {
|
||||
for (f = 0; f < filters; ++f) {
|
||||
int index = b * filters * spatial + i * filters + f;
|
||||
out[index] = (x[index] - mean[f]) * invar[f];
|
||||
}
|
||||
void backwardX(const float *in, const float *dout, const float *scale, const int size, int channels, float eps,
|
||||
float *mean, float *invar, float *dxhathat_sum, float *dxhat_sum, float *out) {
|
||||
meanVar(in, size, channels, eps, mean, invar);
|
||||
for (int i = 0; i < size; i++) {
|
||||
for (int f = 0; f < channels; f++) {
|
||||
int ix = i*channels + f;
|
||||
float x_hat = (in[ix] - mean[f]) * invar[f];
|
||||
float dxhat = dout[ix] * scale[f];
|
||||
dxhat_sum[f] += dxhat;
|
||||
dxhathat_sum[f] += dxhat * x_hat;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < size; i++) {
|
||||
for (int f = 0; f < channels; f++) {
|
||||
int ix = i*channels + f;
|
||||
float x_hat = (in[ix] - mean[f]) * invar[f];
|
||||
float dxhat = dout[ix] * scale[f];
|
||||
out[ix] = 1.f / size * invar[f] * (size * dxhat - dxhat_sum[f] - x_hat * dxhathat_sum[f]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -60,65 +80,3 @@ void backwardScale(const float *x, const float *mean, const float *invar, const
|
|||
}
|
||||
}
|
||||
|
||||
void meanVar(const float *in, int batch, int spatial, int ch, float eps, float *mean, float *invar) {
|
||||
float N = batch * spatial;
|
||||
sumSpatialBatch(in, N, ch, mean);
|
||||
for (int f = 0; f < ch; ++f) {
|
||||
mean[f] /= N;
|
||||
}
|
||||
for (int f=0; f< ch; f++) {
|
||||
float tvar = 0;
|
||||
for (int i =0; i< N; i++) {
|
||||
float x = in[i*ch +f];
|
||||
tvar += (x-mean[f]) *(x-mean[f]);
|
||||
}
|
||||
invar[f] = 1.0f/(sqrt(tvar/N+eps));
|
||||
}
|
||||
}
|
||||
|
||||
void meanDelta(float *yt, int size, int ch, float *invar, float *mean_delta) {
|
||||
sumSpatialBatch(yt, size, ch, mean_delta);
|
||||
for (int i = 0; i < ch; i++) mean_delta[i] *= -invar[i];
|
||||
}
|
||||
|
||||
void meanAdd(const float *x, const float *mean, const float *variance_delta, int batch, int filters, int spatial,
|
||||
float *mean_add, float *mean_delta) {
|
||||
int i, k;
|
||||
memset(mean_add, 0, filters * sizeof(float));
|
||||
for (k = 0; k < spatial * batch; ++k) {
|
||||
for (i = 0; i < filters; ++i) {
|
||||
int index = k * filters + i;
|
||||
mean_add[i] += x[index] - mean[i];
|
||||
}
|
||||
}
|
||||
for (i = 0; i < filters; ++i) {
|
||||
mean_add[i] *= variance_delta[i] * (-2.f / (spatial * batch));
|
||||
mean_delta[i] += mean_add[i];
|
||||
}
|
||||
}
|
||||
|
||||
void varianceDelta(const float *x, const float *delta, const float *mean, const float *invar, int batch, int filters,
|
||||
int spatial, float *variance_delta) {
|
||||
int i, k;
|
||||
memset(variance_delta, 0, filters * sizeof(float));
|
||||
for (k = 0; k < batch * spatial; k++) {
|
||||
for (i = 0; i < filters; i++) {
|
||||
int index = k * filters + i;
|
||||
variance_delta[i] += delta[index] * (x[index] - mean[i]);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < filters; i++) variance_delta[i] *= -.5 * 1.0f/(invar[i]*invar[i]*invar[i]);
|
||||
}
|
||||
|
||||
void NormalizeDelta(const float *x, const float *mean, const float *invar, const float *mean_delta,
|
||||
const float *variance_delta, int batch, int filters, int spatial, float *delta) {
|
||||
int f, k;
|
||||
for (k = 0; k < batch * spatial; k++) {
|
||||
for (f = 0; f < filters; f++) {
|
||||
int index = k * filters + f;
|
||||
delta[index] = delta[index] * invar[f] +
|
||||
variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) +
|
||||
mean_delta[f] / (spatial * batch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,18 +30,11 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
void sumSpatialBatch(const float *in, int size, int ch, float *out);
|
||||
void scaleBias(const float *scales, int batch, int n, int size, float *output);
|
||||
void normalize(const float *x, const float *mean, const float *invar, int batch, int filters, int spatial, float *out);
|
||||
void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, int n,
|
||||
int size, float *scale_updates);
|
||||
void meanVar(const float *in, int batch, int size, int ch, float eps, float *mean, float *invar);
|
||||
void meanDelta(float *yt, int size, int ch, float *invar, float *mean_delta);
|
||||
void varianceDelta(const float *x, const float *delta, const float *mean, const float *invar, int batch, int ch,
|
||||
int spatial, float *variance_delta);
|
||||
void meanAdd(const float *x, const float *mean, const float *variance_delta, int batch, int filters, int spatial,
|
||||
float *mean_add, float *mean_delta);
|
||||
void NormalizeDelta(const float *x, const float *mean, const float *invar, const float *mean_delta,
|
||||
const float *variance_delta, int batch, int filters, int spatial, float *delta);
|
||||
void backwardX(const float *in, const float *dout, const float *scale, const int size, int channels, float eps,
|
||||
float *mean, float *invar, float *xhat_sum, float *dxhat_sum, float *out);
|
||||
void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch,
|
||||
int n, int size, float *scale_updates);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/fp32_grad/softmax_grad.h"
|
||||
#include <string.h>
|
||||
#include "nnacl/fp32_grad/gemm.h"
|
||||
|
||||
void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, float *sum_data, float *sum_mul,
|
||||
SoftmaxParameter *parameter) {
|
||||
int32_t axis = parameter->axis_;
|
||||
int n_dim = parameter->n_dim_;
|
||||
int ele_size = parameter->element_size_;
|
||||
int *input_shape = parameter->input_shape_;
|
||||
int dim = 1;
|
||||
|
||||
int inner_size = 1, outter_size = 1;
|
||||
for (int i = 0; i < axis; i++) {
|
||||
outter_size *= input_shape[i];
|
||||
}
|
||||
for (int i = axis + 1; i < n_dim; i++) {
|
||||
inner_size *= input_shape[i];
|
||||
}
|
||||
|
||||
for (int i = 0; i < inner_size * input_shape[axis]; i++) sum_mul[i] = 1.0;
|
||||
for (int i = 0; i < n_dim; i++) dim *= input_shape[i];
|
||||
dim /= outter_size;
|
||||
memcpy(output_ptr, yt_ptr, ele_size * sizeof(float));
|
||||
|
||||
int M = input_shape[axis];
|
||||
int N = inner_size;
|
||||
int K = 1;
|
||||
for (int i = 0; i < outter_size; i++) {
|
||||
int outter_offset = i * dim;
|
||||
memset(sum_data, 0.0f, inner_size * sizeof(float));
|
||||
for (int k = 0; k < inner_size; k++) {
|
||||
int inner_offset = outter_offset + k;
|
||||
for (int j = 0; j < input_shape[axis]; j++) {
|
||||
int offset = inner_offset + j * inner_size;
|
||||
sum_data[k] += output_ptr[offset] * input_ptr[offset];
|
||||
}
|
||||
}
|
||||
gemm(0, 0, M, N, K, -1, sum_mul, K, sum_data, N, 1, &output_ptr[outter_offset], N);
|
||||
}
|
||||
|
||||
for (int i = 0; i < ele_size; i++) {
|
||||
output_ptr[i] *= input_ptr[i];
|
||||
}
|
||||
}
|
|
@ -14,10 +14,15 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_NNACL_FP32_SOFTMAX_GRAD_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP32_SOFTMAX_GRAD_H_
|
||||
#ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_SOFTMAX_GRAD_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP32_GRAD_SOFTMAX_GRAD_H_
|
||||
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/fp32/softmax.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct SoftmaxCrossEntropyParameter {
|
||||
OpParameter op_parameter_;
|
||||
|
@ -26,4 +31,11 @@ typedef struct SoftmaxCrossEntropyParameter {
|
|||
int n_dim_;
|
||||
int input_shape_[5];
|
||||
} SoftmaxCrossEntropyParameter;
|
||||
#endif // MINDSPORE_LITE_NNACL_FP32_SOFTMAX_GRAD_H_
|
||||
|
||||
void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, float *sum_data,
|
||||
float *sum_mul, SoftmaxParameter *parameter);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_FP32_GRAD_SOFTMAX_GRAD_H_
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
static int CompareOutputRelativeData(float *output_data, float *correct_data, int data_size) {
|
||||
static float CompareOutputRelativeData(float *output_data, float *correct_data, int data_size) {
|
||||
float error = 0;
|
||||
|
||||
// relative error
|
||||
|
@ -35,6 +35,16 @@ static int CompareOutputRelativeData(float *output_data, float *correct_data, in
|
|||
diffSum += diff;
|
||||
}
|
||||
error = diffSum / sum;
|
||||
return error;
|
||||
}
|
||||
|
||||
int CompareRelativeOutput(float *output_data, std::string file_path) {
|
||||
size_t output_size;
|
||||
auto ground_truth = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_path.c_str(), &output_size));
|
||||
size_t output_num = output_size / sizeof(float);
|
||||
// std::cout << "output num : " << output_num << "\n";
|
||||
int error = CompareOutputRelativeData(output_data, ground_truth, output_num);
|
||||
delete [] ground_truth;
|
||||
if (error > 1e-4) {
|
||||
std::cout << "has accuracy error!\n" << error << "\n";
|
||||
return 1;
|
||||
|
@ -42,14 +52,15 @@ static int CompareOutputRelativeData(float *output_data, float *correct_data, in
|
|||
return 0;
|
||||
}
|
||||
|
||||
int CompareRelativeOutput(float *output_data, std::string file_path) {
|
||||
float RelativeOutputError(float *output_data, std::string file_path) {
|
||||
size_t output_size;
|
||||
auto ground_truth = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_path.c_str(), &output_size));
|
||||
size_t output_num = output_size / sizeof(float);
|
||||
std::cout << "output num : " << output_num << "\n";
|
||||
int res = CompareOutputRelativeData(output_data, ground_truth, output_num);
|
||||
delete[] ground_truth;
|
||||
return res;
|
||||
float error = CompareOutputRelativeData(output_data, ground_truth, output_num);
|
||||
delete [] ground_truth;
|
||||
return error;
|
||||
}
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
namespace mindspore {
|
||||
namespace lite {
|
||||
int CompareRelativeOutput(float *output_data, std::string file_path);
|
||||
float RelativeOutputError(float *output_data, std::string file_path);
|
||||
}
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_COMMON_FILE_UTILS_EXT_H_
|
||||
|
|
|
@ -32,13 +32,16 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
||||
static std::vector<schema::PrimitiveType> packed_op = {
|
||||
schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, schema::PrimitiveType_DepthwiseConv2D,
|
||||
schema::PrimitiveType_DeDepthwiseConv2D, schema::PrimitiveType_MatMul};
|
||||
|
||||
// this method will not check whether tensor_idx is a weight tensor index, caller should ensure this.
|
||||
static bool WeightTensorNeedCopy(const lite::Model *model, const uint32_t tensor_idx) {
|
||||
#ifdef SUPPORT_TRAIN
|
||||
return false;
|
||||
#endif
|
||||
|
||||
MS_ASSERT(model != nullptr);
|
||||
auto post_node_idxes = GetLinkedPostNodeIdx(model, tensor_idx);
|
||||
return std::none_of(post_node_idxes.begin(), post_node_idxes.end(), [&](const size_t &post_node_idx) {
|
||||
|
@ -267,7 +270,9 @@ int LiteSession::CompileGraph(Model *model) {
|
|||
}
|
||||
|
||||
executor->Prepare(this->kernels_);
|
||||
#ifndef SUPPORT_TRAIN
|
||||
model->Free();
|
||||
#endif
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -42,9 +42,11 @@ bool ConvertNodes(const schema::MetaGraph *meta_graph, Model *model) {
|
|||
for (uint32_t j = 0; j < count; ++j) {
|
||||
node->input_indices_.push_back(size_t(c_node->inputIndex()->GetAs<uint32_t>(j)));
|
||||
}
|
||||
count = c_node->outputIndex()->size();
|
||||
for (uint32_t j = 0; j < count; ++j) {
|
||||
node->output_indices_.push_back(size_t(c_node->outputIndex()->GetAs<uint32_t>(j)));
|
||||
if (c_node->outputIndex() != nullptr) {
|
||||
count = c_node->outputIndex()->size();
|
||||
for (uint32_t j = 0; j < count; ++j) {
|
||||
node->output_indices_.push_back(size_t(c_node->outputIndex()->GetAs<uint32_t>(j)));
|
||||
}
|
||||
}
|
||||
model->nodes_.push_back(node);
|
||||
}
|
||||
|
|
|
@ -46,6 +46,8 @@ int ActivationGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodeP
|
|||
} else if (prim.name() == "ReLU6") {
|
||||
attr->type = schema::ActivationType_RELU6;
|
||||
}
|
||||
// auto alpha = GetValue<float>(prim.GetAttr("alpha"));
|
||||
attr->alpha = 0; // alpha;
|
||||
this->primitive_->value.value = attr.release();
|
||||
if (this->primitive_->value.value == nullptr) {
|
||||
MS_LOG(ERROR) << "new primitiveT value failed";
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#include "src/ops/apply_momentum.h"
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
||||
#ifdef PRIMITIVE_WRITEABLE
|
||||
int ApplyMomentum::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
|
||||
if (this->primitive_ == nullptr) {
|
||||
|
@ -31,11 +30,17 @@ int ApplyMomentum::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePt
|
|||
MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto attr = std::make_unique<schema::ApplyMomentumT>();
|
||||
this->primitive_->value.value = attr.release();
|
||||
if (this->primitive_->value.value == nullptr) {
|
||||
MS_LOG(ERROR) << "new primitiveT value failed";
|
||||
return RET_ERROR;
|
||||
auto attr = std::make_unique<schema::ApplyMomentumT>();
|
||||
if (attr == nullptr) {
|
||||
MS_LOG(ERROR) << "new primitiveT value failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
this->primitive_->value.value = attr.release();
|
||||
if (this->primitive_->value.value == nullptr) {
|
||||
MS_LOG(ERROR) << "new primitiveT value failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -49,13 +54,13 @@ int ApplyMomentum::UnPackToFlatBuilder(const schema::Primitive *primitive, flatb
|
|||
return RET_ERROR;
|
||||
}
|
||||
auto val_offset = schema::CreateApplyMomentum(*fbb);
|
||||
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_ActivationGrad, val_offset.o);
|
||||
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_ApplyMomentum, val_offset.o);
|
||||
fbb->Finish(prim_offset);
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
int ApplyMomentum::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) {
|
||||
int ApplyMomentum::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) {
|
||||
if (5 != inputs.size()) {
|
||||
MS_LOG(ERROR) << "ApplyMomentum should have at 5 input tensors";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -48,6 +48,9 @@ int ArithmeticGrad::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<
|
|||
|
||||
if ((Type() == schema::PrimitiveType_AddGrad) || (Type() == schema::PrimitiveType_SubGrad)) {
|
||||
ndim_ = outShape.size();
|
||||
x1_shape_.resize(ndim_);
|
||||
x2_shape_.resize(ndim_);
|
||||
dy_shape_.resize(ndim_);
|
||||
auto fillDimNum0 = outShape.size() - inShape0.size();
|
||||
auto fillDimNum1 = outShape.size() - inShape1.size();
|
||||
int j0 = 0;
|
||||
|
@ -61,6 +64,9 @@ int ArithmeticGrad::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<
|
|||
// if (inShape0.size() < inShape1.size())
|
||||
if (dx1->ElementsNum() < dx2->ElementsNum()) {
|
||||
ndim_ = inShape1.size();
|
||||
x1_shape_.resize(ndim_);
|
||||
x2_shape_.resize(ndim_);
|
||||
dy_shape_.resize(ndim_);
|
||||
auto fillDimNum = inShape1.size() - inShape0.size(); // This will not work for batch!
|
||||
int j = 0;
|
||||
for (unsigned int i = 0; i < inShape1.size(); i++) {
|
||||
|
@ -74,8 +80,10 @@ int ArithmeticGrad::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<
|
|||
}
|
||||
} else if (dx2->ElementsNum() < dx1->ElementsNum()) { // if (inShape0.size() > inShape1.size())
|
||||
ndim_ = inShape0.size();
|
||||
x1_shape_.resize(ndim_);
|
||||
x2_shape_.resize(ndim_);
|
||||
dy_shape_.resize(ndim_);
|
||||
broadcasting_ = true;
|
||||
ndim_ = inShape0.size();
|
||||
int j = 0;
|
||||
auto fillDimNum = inShape0.size() - inShape1.size();
|
||||
for (unsigned int i = 0; i < inShape0.size(); i++) {
|
||||
|
|
|
@ -32,7 +32,7 @@ class ArithmeticGrad : public PrimitiveC {
|
|||
ArithmeticGrad() = default;
|
||||
explicit ArithmeticGrad(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
|
||||
#else
|
||||
// explicit Arithmetic(schema::Primitive *primitive) : PrimitiveC(primitive) {}
|
||||
// explicit ArithmeticGrad(const schema::Primitive &primitive) : PrimitiveC(primitive) {}
|
||||
ArithmeticGrad() = default;
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override {
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -41,6 +41,7 @@ int BiasGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &i
|
|||
MS_LOG(ERROR) << "new primitiveT value failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
attr->axis = {0}; // GetValue<std::vector<int>>(prim.GetAttr("axis"));
|
||||
this->primitive_->value.value = attr;
|
||||
if (this->primitive_->value.value == nullptr) {
|
||||
MS_LOG(ERROR) << "primitive value is nullptr";
|
||||
|
@ -73,6 +74,7 @@ std::vector<int> BiasGrad::GetAxis() const {
|
|||
auto fb_vector = this->primitive_->value_as_BiasGrad()->axis();
|
||||
return std::vector<int>(fb_vector->begin(), fb_vector->end());
|
||||
}
|
||||
#endif
|
||||
|
||||
int BiasGrad::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) {
|
||||
if (1 != inputs.size()) {
|
||||
|
@ -99,6 +101,5 @@ int BiasGrad::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> out
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
#endif
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -38,8 +38,8 @@ class BiasGrad : public PrimitiveC {
|
|||
BiasGrad() = default;
|
||||
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
int InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) override;
|
||||
#endif
|
||||
int InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) override;
|
||||
std::vector<int> GetAxis() const;
|
||||
};
|
||||
} // namespace lite
|
||||
|
|
|
@ -67,9 +67,31 @@ int BNGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers:
|
|||
fbb->Finish(prim_offset);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
float BNGrad::GetEps() const { return this->primitive_->value_as_BNGrad()->eps(); }
|
||||
float BNGrad::GetMomentum() const { return this->primitive_->value_as_BNGrad()->momentum(); }
|
||||
|
||||
#endif
|
||||
int BNGrad::InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs) {
|
||||
if (5 != inputs.size()) {
|
||||
MS_LOG(ERROR) << "BNGrad should have five inputs";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (3 != outputs.size()) {
|
||||
MS_LOG(ERROR) << "BNGrad should have three outputs";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto in = inputs[1];
|
||||
auto scale = inputs[2];
|
||||
outputs[0]->set_shape(in->shape());
|
||||
outputs[1]->set_shape(scale->shape());
|
||||
outputs[2]->set_shape(scale->shape());
|
||||
outputs[0]->set_data_type(in->data_type());
|
||||
outputs[1]->set_data_type(scale->data_type());
|
||||
outputs[2]->set_data_type(scale->data_type());
|
||||
outputs[0]->SetFormat(in->GetFormat());
|
||||
outputs[1]->SetFormat(scale->GetFormat());
|
||||
outputs[2]->SetFormat(scale->GetFormat());
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -38,6 +38,8 @@ class BNGrad : public PrimitiveC {
|
|||
BNGrad() = default;
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
#endif
|
||||
int InferShape(std::vector<lite::Tensor *> inputs_,
|
||||
std::vector<lite::Tensor *> outputs_) override;
|
||||
float GetEps() const;
|
||||
float GetMomentum() const;
|
||||
};
|
||||
|
|
|
@ -1,75 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/ops/bn_grad_input.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
#ifdef PRIMITIVE_WRITEABLE
|
||||
float BNGradInput::GetEps() const { return this->primitive_->value.AsBNGradInput()->eps; }
|
||||
float BNGradInput::GetMomentum() const { return this->primitive_->value.AsBNGradInput()->momentum; }
|
||||
|
||||
void BNGradInput::SetEps(float eps) { this->primitive_->value.AsBNGradInput()->eps = eps; }
|
||||
void BNGradInput::SetMomentum(float momentum) { this->primitive_->value.AsBNGradInput()->momentum = momentum; }
|
||||
int BNGradInput::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
|
||||
if (this->primitive_ == nullptr) {
|
||||
this->primitive_ = new (std::nothrow) schema::PrimitiveT;
|
||||
if (this->primitive_ == nullptr) {
|
||||
MS_LOG(ERROR) << "new primitiveT failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
this->primitive_->value.type = schema::PrimitiveType_BNGradInput;
|
||||
}
|
||||
if (this->primitive_->value.type != schema::PrimitiveType_BNGradInput) {
|
||||
MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (this->primitive_->value.value == nullptr) {
|
||||
auto attr = new (std::nothrow) schema::BNGradInputT();
|
||||
if (attr == nullptr) {
|
||||
MS_LOG(ERROR) << "new primitiveT value failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
attr->eps = GetValue<float>(prim.GetAttr("eps"));
|
||||
attr->momentum = GetValue<float>(prim.GetAttr("momentum"));
|
||||
this->primitive_->value.value = attr;
|
||||
if (this->primitive_->value.value == nullptr) {
|
||||
MS_LOG(ERROR) << "primitive value is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
#else
|
||||
int BNGradInput::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
|
||||
MS_ASSERT(nullptr != primitive);
|
||||
MS_ASSERT(nullptr != fbb);
|
||||
auto attr = primitive->value_as_BNGradInput();
|
||||
if (attr == nullptr) {
|
||||
MS_LOG(ERROR) << "value_as_BNGradInputInput return nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto val_offset = schema::CreateBNGradInput(*fbb, attr->eps(), attr->momentum());
|
||||
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_BNGradInput, val_offset.o);
|
||||
fbb->Finish(prim_offset);
|
||||
return RET_OK;
|
||||
}
|
||||
float BNGradInput::GetEps() const { return this->primitive_->value_as_BNGradInput()->eps(); }
|
||||
float BNGradInput::GetMomentum() const { return this->primitive_->value_as_BNGradInput()->momentum(); }
|
||||
|
||||
#endif
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
|
@ -1,47 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LITE_MINDSPORE_LITE_C_OPS_B_N_GRAD_INPUT_H_
|
||||
#define LITE_MINDSPORE_LITE_C_OPS_B_N_GRAD_INPUT_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include "ir/dtype/type_id.h"
|
||||
#include "src/ops/primitive_c.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
class BNGradInput : public PrimitiveC {
|
||||
public:
|
||||
#ifdef PRIMITIVE_WRITEABLE
|
||||
MS_DECLARE_PARENT(BNGradInput, PrimitiveC);
|
||||
BNGradInput() = default;
|
||||
explicit BNGradInput(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
|
||||
void SetEps(float eps);
|
||||
void SetMomentum(float momentum);
|
||||
int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;
|
||||
#else
|
||||
BNGradInput() = default;
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
#endif
|
||||
float GetEps() const;
|
||||
float GetMomentum() const;
|
||||
};
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // LITE_MINDSPORE_LITE_C_OPS_B_N_GRAD_INPUT_H_
|
|
@ -66,108 +66,7 @@ void Conv2DGradFilter::SetHasBias(bool has_bias) { this->primitive_->value.AsCon
|
|||
void Conv2DGradFilter::SetActivationType(int activation_type) {
|
||||
this->primitive_->value.AsConv2DGradFilter()->activationType = (schema::ActivationType)activation_type;
|
||||
}
|
||||
void Conv2DGradFilter::PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group,
|
||||
const std::vector<AnfNodePtr> &inputs) {
|
||||
auto attr = std::make_unique<schema::DepthwiseConv2DT>();
|
||||
auto format = GetValue<std::string>(prim.GetAttr("data_format"));
|
||||
if (format == "NCHW") {
|
||||
attr->format = schema::Format_NCHW;
|
||||
} else if (format == "NHWC") {
|
||||
attr->format = schema::Format_NHWC;
|
||||
} else {
|
||||
attr->format = schema::Format_NUM_OF_FORMAT;
|
||||
}
|
||||
auto pad_list = GetValue<std::vector<int>>(prim.GetAttr("pad_list"));
|
||||
attr->padUp = pad_list[0];
|
||||
attr->padDown = pad_list[1];
|
||||
attr->padLeft = pad_list[2];
|
||||
attr->padRight = pad_list[3];
|
||||
|
||||
auto dilation = GetValue<std::vector<int>>(prim.GetAttr("dilation"));
|
||||
attr->dilateH = dilation[0];
|
||||
attr->dilateW = dilation[1];
|
||||
|
||||
auto kernel_size = GetValue<std::vector<int>>(prim.GetAttr("kernel_size"));
|
||||
attr->kernelH = kernel_size[0];
|
||||
attr->kernelW = kernel_size[1];
|
||||
|
||||
auto stride = GetValue<std::vector<int>>(prim.GetAttr("stride"));
|
||||
attr->strideH = stride[2];
|
||||
attr->strideW = stride[3];
|
||||
|
||||
auto pad_mode = GetValue<std::string>(prim.GetAttr("pad_mode"));
|
||||
if (pad_mode == "valid") {
|
||||
attr->padMode = schema::PadMode_VALID;
|
||||
} else if (pad_mode == "same") {
|
||||
attr->padMode = schema::PadMode_SAME;
|
||||
} else {
|
||||
attr->padMode = schema::PadMode_NOTSET;
|
||||
}
|
||||
|
||||
if (prim.GetAttr("activation_name") != nullptr) {
|
||||
std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name"));
|
||||
attr->activationType = kActivationTypeMap[activate_name];
|
||||
} else {
|
||||
attr->activationType = schema::ActivationType_NO_ACTIVATION;
|
||||
}
|
||||
|
||||
int channel_mutiplier = 1;
|
||||
if (prim.GetAttr("channel_mutiplier") != nullptr) {
|
||||
channel_mutiplier = GetValue<int>(prim.GetAttr("channel_multiplier"));
|
||||
}
|
||||
attr->channelMultiplier = channel_mutiplier;
|
||||
primitive->value.value = attr.release();
|
||||
}
|
||||
|
||||
void Conv2DGradFilter::PopulaterConv2DSingleGroup(const Primitive &prim,
|
||||
schema::PrimitiveT *primitive, const int &group) {
|
||||
auto attr = std::make_unique<schema::Conv2DT>();
|
||||
attr->group = group;
|
||||
auto format = GetValue<std::string>(prim.GetAttr("data_format"));
|
||||
if (format == "NCHW") {
|
||||
attr->format = schema::Format_NCHW;
|
||||
} else if (format == "NHWC") {
|
||||
attr->format = schema::Format_NHWC;
|
||||
} else {
|
||||
attr->format = schema::Format_NUM_OF_FORMAT;
|
||||
}
|
||||
auto pad_list = GetValue<std::vector<int>>(prim.GetAttr("pad_list"));
|
||||
attr->padUp = pad_list[0];
|
||||
attr->padDown = pad_list[1];
|
||||
attr->padLeft = pad_list[2];
|
||||
attr->padRight = pad_list[3];
|
||||
|
||||
auto dilation = GetValue<std::vector<int>>(prim.GetAttr("dilation"));
|
||||
attr->dilateH = dilation[0];
|
||||
attr->dilateW = dilation[1];
|
||||
|
||||
auto kernel_size = GetValue<std::vector<int>>(prim.GetAttr("kernel_size"));
|
||||
attr->kernelH = kernel_size[0];
|
||||
attr->kernelW = kernel_size[1];
|
||||
|
||||
auto stride = GetValue<std::vector<int>>(prim.GetAttr("stride"));
|
||||
attr->strideH = stride[2];
|
||||
attr->strideW = stride[3];
|
||||
|
||||
attr->channelOut = GetValue<int>(prim.GetAttr("out_channel"));
|
||||
|
||||
auto pad_mode = GetValue<std::string>(prim.GetAttr("pad_mode"));
|
||||
if (pad_mode == "valid") {
|
||||
attr->padMode = schema::PadMode_VALID;
|
||||
} else if (pad_mode == "same") {
|
||||
attr->padMode = schema::PadMode_SAME;
|
||||
} else {
|
||||
attr->padMode = schema::PadMode_NOTSET;
|
||||
}
|
||||
|
||||
if (prim.GetAttr("activation_name") != nullptr) {
|
||||
std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name"));
|
||||
attr->activationType = kActivationTypeMap[activate_name];
|
||||
} else {
|
||||
attr->activationType = schema::ActivationType_NO_ACTIVATION;
|
||||
}
|
||||
primitive->value.value = attr.release();
|
||||
}
|
||||
int Conv2DGradFilter::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
|
||||
if (this->primitive_ == nullptr) {
|
||||
this->primitive_ = new (std::nothrow) schema::PrimitiveT;
|
||||
|
@ -181,11 +80,62 @@ int Conv2DGradFilter::UnPackAttr(const Primitive &prim, const std::vector<AnfNod
|
|||
MS_LOG(ERROR) << "primitive_ type is error:" << this->primitive_->value.type;
|
||||
return RET_ERROR;
|
||||
}
|
||||
int group = GetValue<int>(prim.GetAttr("group"));
|
||||
if (group > 1) {
|
||||
PopulaterConv2DMultiGroup(prim, this->primitive_, group, inputs);
|
||||
} else {
|
||||
PopulaterConv2DSingleGroup(prim, this->primitive_, group);
|
||||
|
||||
if (this->primitive_->value.value == nullptr) {
|
||||
auto attr = new (std::nothrow) schema::Conv2DGradFilterT();
|
||||
if (attr == nullptr) {
|
||||
MS_LOG(ERROR) << "new primitiveT value failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
attr->group = GetValue<int>(prim.GetAttr("group"));
|
||||
auto format = GetValue<std::string>(prim.GetAttr("data_format"));
|
||||
if (format == "NCHW") {
|
||||
attr->format = schema::Format_NCHW;
|
||||
} else if (format == "NHWC") {
|
||||
attr->format = schema::Format_NHWC;
|
||||
} else {
|
||||
attr->format = schema::Format_NUM_OF_FORMAT;
|
||||
}
|
||||
auto pad_list = GetValue<std::vector<int>>(prim.GetAttr("pad_list"));
|
||||
attr->padUp = pad_list[0];
|
||||
attr->padDown = pad_list[1];
|
||||
attr->padLeft = pad_list[2];
|
||||
attr->padRight = pad_list[3];
|
||||
|
||||
auto dilation = GetValue<std::vector<int>>(prim.GetAttr("dilation"));
|
||||
attr->dilateH = dilation[0];
|
||||
attr->dilateW = dilation[1];
|
||||
|
||||
auto kernel_size = GetValue<std::vector<int>>(prim.GetAttr("kernel_size"));
|
||||
attr->kernelH = kernel_size[0];
|
||||
attr->kernelW = kernel_size[1];
|
||||
|
||||
auto stride = GetValue<std::vector<int>>(prim.GetAttr("stride"));
|
||||
attr->strideH = stride[0];
|
||||
attr->strideW = stride[1];
|
||||
|
||||
attr->channelOut = GetValue<int>(prim.GetAttr("out_channel"));
|
||||
auto pad_mode = GetValue<std::string>(prim.GetAttr("pad_mode"));
|
||||
if (pad_mode == "valid") {
|
||||
attr->padMode = schema::PadMode_VALID;
|
||||
} else if (pad_mode == "same") {
|
||||
attr->padMode = schema::PadMode_SAME;
|
||||
} else {
|
||||
attr->padMode = schema::PadMode_NOTSET;
|
||||
}
|
||||
|
||||
if (prim.GetAttr("activation_name") != nullptr) {
|
||||
std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name"));
|
||||
attr->activationType = kActivationTypeMap[activate_name];
|
||||
} else {
|
||||
attr->activationType = schema::ActivationType_NO_ACTIVATION;
|
||||
}
|
||||
|
||||
this->primitive_->value.value = attr;
|
||||
if (this->primitive_->value.value == nullptr) {
|
||||
MS_LOG(ERROR) << "primitive value is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -268,6 +218,5 @@ int Conv2DGradFilter::InferShape(std::vector<Tensor *> inputs, std::vector<Tenso
|
|||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -51,9 +51,6 @@ class Conv2DGradFilter : public PrimitiveC {
|
|||
void SetHasBias(bool has_bias);
|
||||
void SetActivationType(int activation_type);
|
||||
int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;
|
||||
void PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group,
|
||||
const std::vector<AnfNodePtr> &inputs);
|
||||
void PopulaterConv2DSingleGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group);
|
||||
#else
|
||||
Conv2DGradFilter() = default;
|
||||
|
||||
|
|
|
@ -64,108 +64,7 @@ void Conv2DGradInput::SetHasBias(bool has_bias) { this->primitive_->value.AsConv
|
|||
void Conv2DGradInput::SetActivationType(int activation_type) {
|
||||
this->primitive_->value.AsConv2DGradInput()->activationType = (schema::ActivationType)activation_type;
|
||||
}
|
||||
void Conv2DGradInput::PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group,
|
||||
const std::vector<AnfNodePtr> &inputs) {
|
||||
auto attr = std::make_unique<schema::DepthwiseConv2DT>();
|
||||
auto format = GetValue<std::string>(prim.GetAttr("data_format"));
|
||||
if (format == "NCHW") {
|
||||
attr->format = schema::Format_NCHW;
|
||||
} else if (format == "NHWC") {
|
||||
attr->format = schema::Format_NHWC;
|
||||
} else {
|
||||
attr->format = schema::Format_NUM_OF_FORMAT;
|
||||
}
|
||||
auto pad_list = GetValue<std::vector<int>>(prim.GetAttr("pad_list"));
|
||||
attr->padUp = pad_list[0];
|
||||
attr->padDown = pad_list[1];
|
||||
attr->padLeft = pad_list[2];
|
||||
attr->padRight = pad_list[3];
|
||||
|
||||
auto dilation = GetValue<std::vector<int>>(prim.GetAttr("dilation"));
|
||||
attr->dilateH = dilation[0];
|
||||
attr->dilateW = dilation[1];
|
||||
|
||||
auto kernel_size = GetValue<std::vector<int>>(prim.GetAttr("kernel_size"));
|
||||
attr->kernelH = kernel_size[0];
|
||||
attr->kernelW = kernel_size[1];
|
||||
|
||||
auto stride = GetValue<std::vector<int>>(prim.GetAttr("stride"));
|
||||
attr->strideH = stride[2];
|
||||
attr->strideW = stride[3];
|
||||
|
||||
auto pad_mode = GetValue<std::string>(prim.GetAttr("pad_mode"));
|
||||
if (pad_mode == "valid") {
|
||||
attr->padMode = schema::PadMode_VALID;
|
||||
} else if (pad_mode == "same") {
|
||||
attr->padMode = schema::PadMode_SAME;
|
||||
} else {
|
||||
attr->padMode = schema::PadMode_NOTSET;
|
||||
}
|
||||
|
||||
if (prim.GetAttr("activation_name") != nullptr) {
|
||||
std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name"));
|
||||
attr->activationType = kActivationTypeMap[activate_name];
|
||||
} else {
|
||||
attr->activationType = schema::ActivationType_NO_ACTIVATION;
|
||||
}
|
||||
|
||||
int channel_mutiplier = 1;
|
||||
if (prim.GetAttr("channel_mutiplier") != nullptr) {
|
||||
channel_mutiplier = GetValue<int>(prim.GetAttr("channel_multiplier"));
|
||||
}
|
||||
attr->channelMultiplier = channel_mutiplier;
|
||||
primitive->value.value = attr.release();
|
||||
}
|
||||
|
||||
void Conv2DGradInput::PopulaterConv2DSingleGroup(const Primitive &prim,
|
||||
schema::PrimitiveT *primitive, const int &group) {
|
||||
auto attr = std::make_unique<schema::Conv2DT>();
|
||||
attr->group = group;
|
||||
auto format = GetValue<std::string>(prim.GetAttr("data_format"));
|
||||
if (format == "NCHW") {
|
||||
attr->format = schema::Format_NCHW;
|
||||
} else if (format == "NHWC") {
|
||||
attr->format = schema::Format_NHWC;
|
||||
} else {
|
||||
attr->format = schema::Format_NUM_OF_FORMAT;
|
||||
}
|
||||
auto pad_list = GetValue<std::vector<int>>(prim.GetAttr("pad_list"));
|
||||
attr->padUp = pad_list[0];
|
||||
attr->padDown = pad_list[1];
|
||||
attr->padLeft = pad_list[2];
|
||||
attr->padRight = pad_list[3];
|
||||
|
||||
auto dilation = GetValue<std::vector<int>>(prim.GetAttr("dilation"));
|
||||
attr->dilateH = dilation[0];
|
||||
attr->dilateW = dilation[1];
|
||||
|
||||
auto kernel_size = GetValue<std::vector<int>>(prim.GetAttr("kernel_size"));
|
||||
attr->kernelH = kernel_size[0];
|
||||
attr->kernelW = kernel_size[1];
|
||||
|
||||
auto stride = GetValue<std::vector<int>>(prim.GetAttr("stride"));
|
||||
attr->strideH = stride[2];
|
||||
attr->strideW = stride[3];
|
||||
|
||||
attr->channelOut = GetValue<int>(prim.GetAttr("out_channel"));
|
||||
|
||||
auto pad_mode = GetValue<std::string>(prim.GetAttr("pad_mode"));
|
||||
if (pad_mode == "valid") {
|
||||
attr->padMode = schema::PadMode_VALID;
|
||||
} else if (pad_mode == "same") {
|
||||
attr->padMode = schema::PadMode_SAME;
|
||||
} else {
|
||||
attr->padMode = schema::PadMode_NOTSET;
|
||||
}
|
||||
|
||||
if (prim.GetAttr("activation_name") != nullptr) {
|
||||
std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name"));
|
||||
attr->activationType = kActivationTypeMap[activate_name];
|
||||
} else {
|
||||
attr->activationType = schema::ActivationType_NO_ACTIVATION;
|
||||
}
|
||||
primitive->value.value = attr.release();
|
||||
}
|
||||
int Conv2DGradInput::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
|
||||
if (this->primitive_ == nullptr) {
|
||||
this->primitive_ = new (std::nothrow) schema::PrimitiveT;
|
||||
|
@ -179,11 +78,63 @@ int Conv2DGradInput::UnPackAttr(const Primitive &prim, const std::vector<AnfNode
|
|||
MS_LOG(ERROR) << "primitive_ type is error:" << this->primitive_->value.type;
|
||||
return RET_ERROR;
|
||||
}
|
||||
int group = GetValue<int>(prim.GetAttr("group"));
|
||||
if (group > 1) {
|
||||
PopulaterConv2DMultiGroup(prim, this->primitive_, group, inputs);
|
||||
} else {
|
||||
PopulaterConv2DSingleGroup(prim, this->primitive_, group);
|
||||
|
||||
if (this->primitive_->value.value == nullptr) {
|
||||
auto attr = new (std::nothrow) schema::Conv2DGradInputT();
|
||||
if (attr == nullptr) {
|
||||
MS_LOG(ERROR) << "new primitiveT value failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
attr->group = GetValue<int>(prim.GetAttr("group"));
|
||||
auto format = GetValue<std::string>(prim.GetAttr("data_format"));
|
||||
if (format == "NCHW") {
|
||||
attr->format = schema::Format_NCHW;
|
||||
} else if (format == "NHWC") {
|
||||
attr->format = schema::Format_NHWC;
|
||||
} else {
|
||||
attr->format = schema::Format_NUM_OF_FORMAT;
|
||||
}
|
||||
auto pad_list = GetValue<std::vector<int>>(prim.GetAttr("pad_list"));
|
||||
attr->padUp = pad_list[0];
|
||||
attr->padDown = pad_list[1];
|
||||
attr->padLeft = pad_list[2];
|
||||
attr->padRight = pad_list[3];
|
||||
|
||||
auto dilation = GetValue<std::vector<int>>(prim.GetAttr("dilation"));
|
||||
attr->dilateH = dilation[0];
|
||||
attr->dilateW = dilation[1];
|
||||
|
||||
auto kernel_size = GetValue<std::vector<int>>(prim.GetAttr("kernel_size"));
|
||||
attr->kernelH = kernel_size[0];
|
||||
attr->kernelW = kernel_size[1];
|
||||
|
||||
auto stride = GetValue<std::vector<int>>(prim.GetAttr("stride"));
|
||||
attr->strideH = stride[0];
|
||||
attr->strideW = stride[1];
|
||||
|
||||
attr->channelOut = GetValue<int>(prim.GetAttr("out_channel"));
|
||||
|
||||
auto pad_mode = GetValue<std::string>(prim.GetAttr("pad_mode"));
|
||||
if (pad_mode == "valid") {
|
||||
attr->padMode = schema::PadMode_VALID;
|
||||
} else if (pad_mode == "same") {
|
||||
attr->padMode = schema::PadMode_SAME;
|
||||
} else {
|
||||
attr->padMode = schema::PadMode_NOTSET;
|
||||
}
|
||||
|
||||
if (prim.GetAttr("activation_name") != nullptr) {
|
||||
std::string activate_name = GetValue<std::string>(prim.GetAttr("activation_name"));
|
||||
attr->activationType = kActivationTypeMap[activate_name];
|
||||
} else {
|
||||
attr->activationType = schema::ActivationType_NO_ACTIVATION;
|
||||
}
|
||||
|
||||
this->primitive_->value.value = attr;
|
||||
if (this->primitive_->value.value == nullptr) {
|
||||
MS_LOG(ERROR) << "primitive value is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -265,6 +216,5 @@ int Conv2DGradInput::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor
|
|||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -51,9 +51,6 @@ class Conv2DGradInput : public PrimitiveC {
|
|||
void SetHasBias(bool has_bias);
|
||||
void SetActivationType(int activation_type);
|
||||
int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;
|
||||
void PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group,
|
||||
const std::vector<AnfNodePtr> &inputs);
|
||||
void PopulaterConv2DSingleGroup(const Primitive &prim, schema::PrimitiveT *primitive, const int &group);
|
||||
#else
|
||||
Conv2DGradInput() = default;
|
||||
|
||||
|
|
|
@ -47,6 +47,15 @@ int Depend::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inp
|
|||
}
|
||||
return RET_OK;
|
||||
}
|
||||
#else
|
||||
int Depend::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
|
||||
MS_ASSERT(nullptr != primitive);
|
||||
MS_ASSERT(nullptr != fbb);
|
||||
auto val_offset = schema::CreateDepend(*fbb);
|
||||
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Depend, val_offset.o);
|
||||
fbb->Finish(prim_offset);
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -31,9 +31,10 @@ class Depend : public PrimitiveC {
|
|||
int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;
|
||||
#else
|
||||
Depend() = default;
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
#endif
|
||||
};
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // LITE_MINDSPORE_LITE_SRC_OPS_Depend_H_
|
||||
#endif // LITE_MINDSPORE_LITE_SRC_OPS_DEPEND_H_
|
||||
|
|
|
@ -66,7 +66,6 @@ class DetectionPostProcess : public PrimitiveC {
|
|||
bool GetUseRegularNms() const;
|
||||
bool GetOutQuantized() const;
|
||||
};
|
||||
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -80,9 +80,9 @@ int PoolingGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr>
|
|||
} else {
|
||||
attr->format = schema::Format_NUM_OF_FORMAT;
|
||||
}
|
||||
if (prim.instance_name() == "MaxPool") {
|
||||
if (prim.instance_name() == "MaxPoolGrad") {
|
||||
attr->poolingMode = schema::PoolMode_MAX_POOLING;
|
||||
} else if (prim.instance_name() == "MeanPool") {
|
||||
} else if (prim.instance_name() == "MeanPoolGrad") {
|
||||
attr->poolingMode = schema::PoolMode_MEAN_POOLING;
|
||||
}
|
||||
|
||||
|
@ -189,6 +189,5 @@ int PoolingGrad::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *>
|
|||
grad_output->SetFormat(input->GetFormat());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -139,7 +139,6 @@
|
|||
#include "src/ops/power_grad.h"
|
||||
#include "src/ops/softmax_cross_entropy.h"
|
||||
#include "src/ops/bn_grad.h"
|
||||
#include "src/ops/bn_grad_input.h"
|
||||
#include "src/ops/arithmetic_grad.h"
|
||||
#include "src/ops/depend.h"
|
||||
#include "src/ops/flatten_grad.h"
|
||||
|
@ -392,49 +391,42 @@ std::shared_ptr<PrimitiveC> PrimitiveC::Create(const Primitive &prim, const std:
|
|||
return NewPrimitiveC<Elu>(prim, inputs, quantType);
|
||||
} else if (op_type == "Log") {
|
||||
return NewPrimitiveC<Log>(prim, inputs, quantType);
|
||||
} else if (op_type == "Conv2DBackpropInput") {
|
||||
} else if (op_type == "DeConv2D") {
|
||||
return NewPrimitiveC<DeConv2D>(prim, inputs, quantType);
|
||||
} else if (op_type == "tuple_getitem") {
|
||||
return NewPrimitiveC<TupleGetItem>(prim, inputs, quantType);
|
||||
} else if (op_type == "Softmax") {
|
||||
return NewPrimitiveC<SoftMax>(prim, inputs, quantType);
|
||||
#ifdef SUPPORT_TRAIN0
|
||||
|
||||
#ifdef SUPPORT_TRAIN
|
||||
} else if (op_type == "SoftmaxCrossEntropyWithLogits") {
|
||||
return NewPrimitiveC<SoftmaxCrossEntropy>(prim, inputs, quantType);
|
||||
} else if (op_type == "BiasAddGrad") {
|
||||
return NewPrimitiveC<BiasGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "ApplyMomentum") {
|
||||
return NewPrimitiveC<ApplyMomentum>(prim, inputs, quantType);
|
||||
} else if (op_type == "Depend") {
|
||||
return NewPrimitiveC<Depend>(prim, inputs, quantType);
|
||||
} else if ((op_type == "ReluGrad" || op_type == "Relu6Grad" || op_type == "SigmoidGrad")) {
|
||||
return NewPrimitiveC<ActivationGrad>(prim, inputs, quantType);
|
||||
} else if ((op_type == "MaxPoolGrad") || (op_type == "MeanPoolGrad")) {
|
||||
return NewPrimitiveC<PoolingGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "Conv2DBackpropFilter") {
|
||||
return NewPrimitiveC<Conv2DGradFilter>(prim, inputs, quantType);
|
||||
} else if (op_type == "BiasAddGrad") {
|
||||
return NewPrimitiveC<BiasGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "ApplyMomentum") {
|
||||
return NewPrimitiveC<ApplyMomentum>(prim, inputs, quantType);
|
||||
} else if (op_type == "Conv2DBackpropInput") {
|
||||
return NewPrimitiveC<Conv2DGradInput>(prim, inputs, quantType);
|
||||
} else if (op_type == "BatchNormGrad") {
|
||||
return NewPrimitiveC<BNGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "FlattenGrad") {
|
||||
return NewPrimitiveC<FlattenGrad>(prim, inputs, quantType);
|
||||
#endif
|
||||
#ifdef SUPPORT_TRAIN0
|
||||
} else if (op_type == "PowerGrad") {
|
||||
return NewPrimitiveC<PowerGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "NegGrad") {
|
||||
return NewPrimitiveC<NegGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "LogGrad") {
|
||||
return NewPrimitiveC<LogGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "BatchNormGrad") {
|
||||
return NewPrimitiveC<BNGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "Conv2DGradInput") {
|
||||
return NewPrimitiveC<Conv2DGradInput>(prim, inputs, quantType);
|
||||
} else if (op_type == "Conv2DGradFilter") {
|
||||
return NewPrimitiveC<Conv2DGradFilter>(prim, inputs, quantType);
|
||||
} else if (op_type == "BiasGrad") {
|
||||
return NewPrimitiveC<BiasGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "ActivationGrad") {
|
||||
return NewPrimitiveC<ActivationGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "PoolingGrad") {
|
||||
return NewPrimitiveC<PoolingGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "BNGradInput") {
|
||||
return NewPrimitiveC<BNGradInput>(prim, inputs, quantType);
|
||||
} else if (op_type == "PowerGrad") {
|
||||
return NewPrimitiveC<PowerGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "SoftmaxCrossEntropyWithLogits") {
|
||||
return NewPrimitiveC<SoftmaxCrossEntropy>(prim, inputs, quantType);
|
||||
} else if (op_type == "Depend") {
|
||||
return NewPrimitiveC<Depend>(prim, inputs, quantType);
|
||||
} else if (op_type == "FlattenGrad") {
|
||||
return NewPrimitiveC<FlattenGrad>(prim, inputs, quantType);
|
||||
#endif
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported primitive type in Create : " << op_type;
|
||||
|
@ -677,12 +669,10 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) {
|
|||
return new ArithmeticGrad(primitive);
|
||||
case schema::PrimitiveType_DivGrad:
|
||||
return new ArithmeticGrad(primitive);
|
||||
case schema::PrimitiveType_PowerGrad:
|
||||
return new PowerGrad(primitive);
|
||||
case schema::PrimitiveType_BNGradInput:
|
||||
return new BNGradInput(primitive);
|
||||
case schema::PrimitiveType_SoftmaxCrossEntropy:
|
||||
return new SoftmaxCrossEntropy(primitive);
|
||||
case schema::PrimitiveType_PowerGrad:
|
||||
return new PowerGrad(primitive);
|
||||
case schema::PrimitiveType_Depend:
|
||||
return new Depend(primitive);
|
||||
case schema::PrimitiveType_FlattenGrad:
|
||||
|
@ -934,7 +924,9 @@ PrimitiveC *PrimitiveC::Create(const schema::Primitive *primitive) {
|
|||
case schema::PrimitiveType_MulGrad:
|
||||
return NewPrimitiveC<ArithmeticGrad>(primitive);
|
||||
case schema::PrimitiveType_DivGrad:
|
||||
return NewPrimitiveC<ArithmeticGrad>(primitive);
|
||||
return NewPrimitiveC<ArithmeticGrad>(primitive);
|
||||
case schema::PrimitiveType_SoftmaxCrossEntropy:
|
||||
return NewPrimitiveC<SoftmaxCrossEntropy>(primitive);
|
||||
case schema::PrimitiveType_NegGrad:
|
||||
return NewPrimitiveC<NegGrad>(primitive);
|
||||
case schema::PrimitiveType_LogGrad:
|
||||
|
|
|
@ -43,6 +43,8 @@ int SoftmaxCrossEntropy::UnPackAttr(const Primitive &prim, const std::vector<Anf
|
|||
MS_LOG(ERROR) << "new primitiveT value failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
attr->axis = {0};
|
||||
this->primitive_->value.value = attr;
|
||||
if (this->primitive_->value.value == nullptr) {
|
||||
MS_LOG(ERROR) << "primitive value is nullptr";
|
||||
|
@ -102,6 +104,5 @@ int SoftmaxCrossEntropy::InferShape(std::vector<Tensor *> inputs, std::vector<Te
|
|||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -50,7 +50,6 @@ int ConvolutionDepthwiseCPUKernel::InitWeightBias() {
|
|||
}
|
||||
PackWeightKHWToHWKFp32(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(), channel);
|
||||
|
||||
auto bias_tensor = in_tensors_[kBiasIndex];
|
||||
bias_data_ = reinterpret_cast<float *>(malloc(channel * sizeof(float)));
|
||||
if (bias_data_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||
|
@ -59,6 +58,7 @@ int ConvolutionDepthwiseCPUKernel::InitWeightBias() {
|
|||
|
||||
memset(bias_data_, 0, channel * sizeof(float));
|
||||
if (in_tensors_.size() == kInputSize2) {
|
||||
auto bias_tensor = in_tensors_[kBiasIndex];
|
||||
auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData());
|
||||
memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float));
|
||||
}
|
||||
|
|
|
@ -63,6 +63,30 @@ int FusedBatchnormCPUKernel::InitConstTensor() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int FusedBatchnormCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret;
|
||||
return ret;
|
||||
}
|
||||
auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_);
|
||||
if (is_train()) {
|
||||
float *in = static_cast<float *>(in_tensors_[0]->MutableData());
|
||||
float *run_mean = static_cast<float *>(out_tensors_[1]->MutableData());
|
||||
float *run_var = static_cast<float *>(out_tensors_[2]->MutableData());
|
||||
float *save_mean = static_cast<float *>(out_tensors_[3]->MutableData());
|
||||
float *save_inv_var = static_cast<float *>(out_tensors_[4]->MutableData());
|
||||
std::fill(run_mean, run_mean+param->channel_, 0.f);
|
||||
std::fill(run_var, run_var+param->channel_, 0.f);
|
||||
FusedBatchNormFp32MeanVar(in, 0.9, run_mean, run_var, param, save_mean, save_inv_var);
|
||||
}
|
||||
ret = ParallelLaunch(THREAD_POOL_DEFAULT, BatchNormRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int FusedBatchnormCPUKernel::DoExecute(int task_id) {
|
||||
auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_);
|
||||
FusedBatchNormFp32(in_tensors_.at(0)->MutableData(), scale_, offset_, mean_, variance_, param, task_id,
|
||||
|
|
|
@ -30,7 +30,7 @@ class FusedBatchnormCPUKernel : public BatchnormCPUKernel {
|
|||
~FusedBatchnormCPUKernel() { FreeScaleAndOffset(); }
|
||||
|
||||
int ReSize() override;
|
||||
|
||||
int Run() override;
|
||||
int InitConstTensor() override;
|
||||
int DoExecute(int task_id) override;
|
||||
|
||||
|
|
|
@ -186,10 +186,10 @@ int MatmulCPUKernel::Run() {
|
|||
auto b_src = reinterpret_cast<float *>(in_tensors_[1]->data_c());
|
||||
auto c_src = reinterpret_cast<float *>(out_tensors_[0]->data_c());
|
||||
|
||||
if (params_->a_const_ == false) {
|
||||
if (params_->a_const_ == false || is_train()) {
|
||||
InitMatrixA(a_src, a_c12_ptr_);
|
||||
}
|
||||
if (params_->b_const_ == false) {
|
||||
if (params_->b_const_ == false || is_train()) {
|
||||
InitMatrixB(b_src, b_r8_ptr_);
|
||||
}
|
||||
|
||||
|
@ -201,4 +201,16 @@ int MatmulCPUKernel::Run() {
|
|||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void MatmulCPUKernel::eval() {
|
||||
// Copy weights after training
|
||||
LiteKernel::eval();
|
||||
if (params_->a_const_ == true) {
|
||||
InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->MutableData()), a_c12_ptr_);
|
||||
}
|
||||
if (params_->b_const_ == true) {
|
||||
InitMatrixB(reinterpret_cast<float *>(in_tensors_[1]->MutableData()), b_r8_ptr_);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -34,6 +34,8 @@ class MatmulCPUKernel : public MatmulBaseCPUKernel {
|
|||
int ReSize() override;
|
||||
int Run() override;
|
||||
int RunImpl(int task_id);
|
||||
void eval() override;
|
||||
|
||||
|
||||
private:
|
||||
void InitMatrixA(float *src_ptr, float *dst_ptr);
|
||||
|
|
|
@ -28,7 +28,7 @@ class ActivationGradCPUKernel : public LiteKernel {
|
|||
explicit ActivationGradCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(param, inputs, outputs, ctx, primitive) {
|
||||
: LiteKernel(param, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
|
||||
param_act_grad_ = reinterpret_cast<ActivationParameter *>(param);
|
||||
}
|
||||
~ActivationGradCPUKernel() override = default;
|
||||
|
|
|
@ -76,7 +76,7 @@ kernel::LiteKernel *CpuBiasGradFp32KernelCreator(const std::vector<lite::Tensor
|
|||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_BiasGrad);
|
||||
auto *kernel =
|
||||
new (std::nothrow) BiasGradCPUKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs, ctx, primitive);
|
||||
new (std::nothrow) BiasGradCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new BiasGradCPUKernel fail!";
|
||||
return nullptr;
|
||||
|
|
|
@ -56,7 +56,7 @@ OpParameter *PopulateBNGradParameter(const lite::Primitive *primitive) {
|
|||
int BNGradCPUKernel::Init() {
|
||||
auto *input_x = in_tensors_.at(1);
|
||||
int channels = input_x->shape().at(kNHWC_C);
|
||||
workspace_size = 5 * channels;
|
||||
workspace_size = 4 * channels;
|
||||
workspace = new (std::nothrow) float[workspace_size];
|
||||
if (workspace == nullptr) {
|
||||
MS_LOG(ERROR) << "new workspace fail!";
|
||||
|
@ -89,9 +89,8 @@ int BNGradCPUKernel::Run() {
|
|||
std::fill(workspace, workspace + workspace_size, 0.f);
|
||||
float *mean = workspace;
|
||||
float *invar = mean + channels;
|
||||
float *mean_delta = invar + channels;
|
||||
float *variance_delta = mean_delta + channels;
|
||||
float *mean_add_delta = variance_delta + channels;
|
||||
float *dxhat_sum = invar + channels;
|
||||
float *dxhathat_sum = dxhat_sum + channels;
|
||||
|
||||
float *x = reinterpret_cast<float *>(input_x->MutableData());
|
||||
float *yt = reinterpret_cast<float *>(input_yt->MutableData());
|
||||
|
@ -100,13 +99,7 @@ int BNGradCPUKernel::Run() {
|
|||
float *dscale = reinterpret_cast<float *>(output_scale->MutableData());
|
||||
float *dbias = reinterpret_cast<float *>(output_bias->MutableData());
|
||||
|
||||
std::copy(yt, yt + batch * channels * spatial, dx);
|
||||
meanVar(x, batch, spatial, channels, eps, mean, invar);
|
||||
scaleBias(scale, batch, channels, spatial, dx);
|
||||
meanDelta(dx, spatial, channels, invar, mean_delta);
|
||||
varianceDelta(x, dx, mean, invar, batch, channels, spatial, variance_delta);
|
||||
meanAdd(x, mean, variance_delta, batch, channels, spatial, mean_add_delta, mean_delta);
|
||||
NormalizeDelta(x, mean, invar, mean_delta, variance_delta, batch, channels, spatial, dx);
|
||||
backwardX(x, yt, scale, batch * spatial, channels, eps, mean, invar, dxhat_sum, dxhathat_sum, dx);
|
||||
// dbias
|
||||
sumSpatialBatch(yt, batch * spatial, channels, dbias);
|
||||
// dscale
|
||||
|
|
|
@ -29,7 +29,7 @@ class BNGradCPUKernel : public LiteKernel {
|
|||
const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
~BNGradCPUKernel() override { delete workspace; }
|
||||
~BNGradCPUKernel() override { delete [] workspace; }
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
|
@ -39,8 +39,5 @@ class BNGradCPUKernel : public LiteKernel {
|
|||
float *workspace;
|
||||
int workspace_size;
|
||||
};
|
||||
|
||||
// OpParameter *PopulateBNGradParameter(const lite::Primitive *primitive);
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_BN_GRAD_H_
|
||||
|
|
|
@ -41,10 +41,12 @@ int ConvolutionTrainCPUKernel::Init() {
|
|||
conv_param_->kernel_h_ = input_weight->shape().at(kNHWC_H);
|
||||
conv_param_->kernel_w_ = input_weight->shape().at(kNHWC_W);
|
||||
|
||||
conv_param_->group_ = (conv_param_->group_ == 0)? conv_param_->input_channel_:conv_param_->group_;
|
||||
|
||||
int ws_size = conv_param_->output_h_ * conv_param_->output_w_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ *
|
||||
conv_param_->input_channel_ / conv_param_->group_;
|
||||
|
||||
workspace = new float[ws_size];
|
||||
workspace = new (std::nothrow) float[ws_size];
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -103,7 +105,7 @@ kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor
|
|||
const lite::Context *ctx, const kernel::KernelKey &desc,
|
||||
const lite::PrimitiveC *primitive) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D || desc.type == schema::PrimitiveType_DepthwiseConv2D);
|
||||
|
||||
auto *kernel = new (std::nothrow) ConvolutionTrainCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
MS_ASSERT(kernel != nullptr);
|
||||
|
|
|
@ -0,0 +1,186 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "src/kernel_registry.h"
|
||||
#include "nnacl/softmax_parameter.h"
|
||||
#include "nnacl/fp32/softmax.h"
|
||||
#include "src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_SoftmaxCrossEntropy;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
int SoftmaxCrossEntropyWithLogitsCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *labels, const float *logits,
|
||||
float *grads, float *output2) const {
|
||||
float eps = 1e-6;
|
||||
float total_loss = 0.0;
|
||||
if (grads != nullptr) {
|
||||
for (int i = 0; i < param_->batch_size_; ++i) {
|
||||
for (size_t j = 0; j < param_->number_of_classes_; ++j) {
|
||||
float logit =
|
||||
-logf(logits[i * param_->number_of_classes_ + j] <= 0.0 ? eps : logits[i * param_->number_of_classes_ + j]);
|
||||
grads[i * param_->number_of_classes_ + j] =
|
||||
(logits[i * param_->number_of_classes_ + j] - labels[i * param_->number_of_classes_ + j])/param_->batch_size_;
|
||||
total_loss += labels[i * param_->number_of_classes_ + j] * logit;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < param_->batch_size_; ++i) {
|
||||
for (size_t j = 0; j < param_->number_of_classes_; ++j) {
|
||||
float logit =
|
||||
-logf(logits[i * param_->number_of_classes_ + j] <= 0.0 ? eps : logits[i * param_->number_of_classes_ + j]);
|
||||
total_loss += labels[i * param_->number_of_classes_ + j] * logit;
|
||||
}
|
||||
}
|
||||
}
|
||||
output2[0] = total_loss / param_->batch_size_;
|
||||
}
|
||||
|
||||
#if 0
|
||||
void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const int *labels, const float *losses,
|
||||
float *output) const {
|
||||
float total_loss = 0;
|
||||
for (int i = 0; i < param_->batch_size_; ++i) {
|
||||
if (labels[i] < 0) {
|
||||
MS_LOG(EXCEPTION) << "label value must >= 0";
|
||||
}
|
||||
size_t label = labels[i];
|
||||
if (label > param->number_of_classes_) {
|
||||
MS_LOG(EXCEPTION) << "error label input!";
|
||||
} else {
|
||||
total_loss -= logf(losses[i * param->number_of_classes_ + label]);
|
||||
}
|
||||
}
|
||||
output[0] = total_loss / param->batch_size_;
|
||||
}
|
||||
|
||||
void SoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *labels, const float *losses, float *grads,
|
||||
float *output) const {
|
||||
size_t row_start = 0;
|
||||
float total_loss = 0;
|
||||
for (int i = 0; i < param->batch_size_; ++i) {
|
||||
if (labels[i] < 0) {
|
||||
MS_LOG(EXCEPTION) << "label value must >= 0";
|
||||
}
|
||||
size_t label = labels[i];
|
||||
if (label > param->number_of_classes_) {
|
||||
MS_LOG(EXCEPTION) << "error label input!";
|
||||
} else {
|
||||
total_loss -= logf(losses[i * param->number_of_classes_ + label]);
|
||||
for (size_t j = 0; j < param->number_of_classes_; ++j) {
|
||||
size_t index = row_start + j;
|
||||
if (j == label) {
|
||||
grads[index] = (losses[index] - 1) / param->batch_size_;
|
||||
} else {
|
||||
grads[index] = losses[index] / param->batch_size_;
|
||||
}
|
||||
}
|
||||
}
|
||||
row_start += param->number_of_classes_;
|
||||
}
|
||||
output[0] = total_loss / param->batch_size_;
|
||||
}
|
||||
#endif
|
||||
|
||||
int SoftmaxCrossEntropyWithLogitsCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto ins = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
|
||||
auto labels = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
|
||||
float *out = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
|
||||
float *grads = NULL;
|
||||
if (is_train() && out_tensors_.size() > 1) {
|
||||
grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData());
|
||||
}
|
||||
size_t data_size = in_tensors_.at(0)->ElementsNum();
|
||||
float *losses = new (std::nothrow) float[data_size];
|
||||
if (losses == nullptr) {
|
||||
MS_LOG(ERROR) << "losses is null";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
MS_ASSERT(out != nullptr);
|
||||
MS_ASSERT(labels != nullptr);
|
||||
MS_ASSERT(ins != nullptr);
|
||||
std::fill(losses_, losses_ + data_size, 0);
|
||||
std::fill(sum_data_, sum_data_ + sm_params_.input_shape_[0], 0);
|
||||
Softmax(ins, losses_, sum_data_, &sm_params_);
|
||||
ForwardPostExecute(labels, losses_, grads, out);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int SoftmaxCrossEntropyWithLogitsCPUKernel::Init() {
|
||||
auto dims = in_tensors_[0]->shape();
|
||||
param_->n_dim_ = 2;
|
||||
param_->number_of_classes_ = dims[1];
|
||||
param_->batch_size_ = dims[0];
|
||||
for (unsigned int i = 0; i < dims.size(); i++) param_->input_shape_[i] = dims[i];
|
||||
if (2 != this->in_tensors_.size()) {
|
||||
MS_LOG(ERROR) << "softmax entropy loss should have two inputs";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto *in0 = in_tensors_.front();
|
||||
if (in0 == nullptr) {
|
||||
MS_LOG(ERROR) << "softmax etropy loss in0 have no data";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
size_t data_size = in_tensors_.at(0)->ElementsNum();
|
||||
losses_ = new (std::nothrow) float[data_size];
|
||||
sum_data_ = new (std::nothrow) float[dims[0]];
|
||||
MS_ASSERT(losses_ != nullptr);
|
||||
MS_ASSERT(sum_data_ != nullptr);
|
||||
|
||||
sm_params_.n_dim_ = 2;
|
||||
sm_params_.element_size_ = data_size;
|
||||
sm_params_.axis_ = 1;
|
||||
for (size_t i = 0; i < dims.size(); i++) sm_params_.input_shape_[i] = dims[i];
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_SoftmaxCrossEntropy);
|
||||
auto *kernel =
|
||||
new (std::nothrow) SoftmaxCrossEntropyWithLogitsCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
MS_ASSERT(kernel != nullptr);
|
||||
auto ret = kernel->Init();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
// REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SoftmaxCrossEntropy, CpuSoftmaxCrossEntropyFp32KernelCreator)
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/train/loss_kernel.h"
|
||||
#include "ir/anf.h"
|
||||
#include "nnacl/fp32_grad/softmax_grad.h"
|
||||
#include "nnacl/fp32/arithmetic.h"
|
||||
#include "nnacl/softmax_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
class SoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel {
|
||||
public:
|
||||
explicit SoftmaxCrossEntropyWithLogitsCPUKernel(OpParameter *parameter,
|
||||
const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
const lite::Context *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LossKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
param_ = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter);
|
||||
}
|
||||
~SoftmaxCrossEntropyWithLogitsCPUKernel() override {
|
||||
delete[] losses_;
|
||||
delete[] sum_data_;
|
||||
}
|
||||
|
||||
void ForwardPostExecute(const float *labels, const float *logits,
|
||||
float *output1, float *output2) const;
|
||||
// void ForwardPostExecute(const int *labels, const float *losses, float *output) const;
|
||||
// void GradPostExecute(const int *labels, const float *losses, float* grads, float *output) const;
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
|
||||
private:
|
||||
SoftmaxCrossEntropyParameter *param_;
|
||||
SoftmaxParameter sm_params_;
|
||||
float *losses_ = nullptr;
|
||||
float *sum_data_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_
|
|
@ -0,0 +1,100 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32_grad/softmax_grad.h"
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
#include "nnacl/fp32_grad/softmax_grad.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
// using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
// using mindspore::schema::PrimitiveType_SoftMaxGrad;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int SoftmaxGradCPUKernel::Init() {
|
||||
// auto input_tensor =in_tensors_.at(0);
|
||||
|
||||
param = reinterpret_cast<SoftmaxParameter *>(op_parameter_);
|
||||
auto in_shape = in_tensors_.at(0)->shape();
|
||||
auto in_dims = in_shape.size();
|
||||
int ele_size = 1;
|
||||
param->n_dim_ = in_dims;
|
||||
for (size_t i = 0; i < in_dims; i++) {
|
||||
param->input_shape_[i] = in_shape[i];
|
||||
ele_size *= in_shape[i];
|
||||
}
|
||||
param->element_size_ = ele_size;
|
||||
|
||||
// malloc tmp buffer
|
||||
auto axis = param->axis_;
|
||||
if ((axis < -1) || (axis > param->n_dim_)) {
|
||||
MS_LOG(ERROR) << "SoftmaxGrad axis is invalid!";
|
||||
} else if (axis == -1) {
|
||||
axis = param->axis_ = (in_dims - 1);
|
||||
}
|
||||
|
||||
int inner_size = 1;
|
||||
for (size_t i = axis + 1; i < in_dims; i++) {
|
||||
inner_size *= in_shape[i];
|
||||
}
|
||||
|
||||
sum_data_ = new (std::nothrow) float[inner_size];
|
||||
MS_ASSERT(sum_data_ != nullptr);
|
||||
sum_mul_ = new (std::nothrow) float[inner_size * in_shape[axis]];
|
||||
MS_ASSERT(sum_mul_ != nullptr);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int SoftmaxGradCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int SoftmaxGradCPUKernel::Run() {
|
||||
// auto input_ptr = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->MutableData());
|
||||
auto input_ptr = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->MutableData());
|
||||
auto yt_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
|
||||
auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData());
|
||||
SoftmaxGrad(input_ptr, yt_ptr, output_ptr, sum_data_, sum_mul_, reinterpret_cast<SoftmaxParameter *>(op_parameter_));
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuSoftmaxGradFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
// MS_ASSERT(desc.type == schema::PrimitiveType_SoftMaxGrad);
|
||||
auto *kernel = new (std::nothrow) SoftmaxGradCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new SoftmaxGradCPUKernel fail!";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
// REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SoftMaxGrad, CpuSoftmaxGradFp32KernelCreator)
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,49 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_GRAD_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_GRAD_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "nnacl/softmax_parameter.h"
|
||||
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class SoftmaxGradCPUKernel : public LiteKernel {
|
||||
public:
|
||||
explicit SoftmaxGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
param = reinterpret_cast<SoftmaxParameter *>(parameter);
|
||||
}
|
||||
~SoftmaxGradCPUKernel() override = default;
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
|
||||
private:
|
||||
SoftmaxParameter *param;
|
||||
float *sum_data_ = nullptr;
|
||||
float *sum_mul_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SOFTMAX_GRAD_H_
|
||||
|
|
@ -143,7 +143,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Init() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
kernel::LiteKernel *CpuSparseSoftmaxCrossEntropyFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc,
|
||||
|
@ -163,5 +163,5 @@ kernel::LiteKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector<li
|
|||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SoftmaxCrossEntropy, CpuSoftmaxCrossEntropyFp32KernelCreator)
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SoftmaxCrossEntropy, CpuSparseSoftmaxCrossEntropyFp32KernelCreator)
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -57,6 +57,7 @@ int Tensor::CopyTensor(const Tensor &srcTensor, bool copyData) {
|
|||
this->data_type_ = srcTensor.data_type_;
|
||||
this->shape_ = srcTensor.shape_;
|
||||
this->category_ = srcTensor.category_;
|
||||
this->format_ = srcTensor.format_;
|
||||
if (copyData) {
|
||||
auto ret = CopyTensorData(srcTensor);
|
||||
if (0 != ret) {
|
||||
|
|
|
@ -27,6 +27,8 @@
|
|||
#include "nnacl/conv_parameter.h"
|
||||
#include "src/ops/power_grad.h"
|
||||
#include "nnacl/power_parameter.h"
|
||||
#include "src/ops/bias_grad.h"
|
||||
#include "nnacl/arithmetic_common.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
|
@ -36,7 +38,7 @@ OpParameter *DefaultPopulateParameter(const mindspore::lite::PrimitiveC *primiti
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
OpParameter *param = new (std::nothrow) OpParameter();
|
||||
OpParameter *param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "new Param for primitive failed.";
|
||||
return nullptr;
|
||||
|
@ -51,7 +53,8 @@ OpParameter *PopulateSoftmaxCrossEntropyParameter(const mindspore::lite::Primiti
|
|||
MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op.";
|
||||
return nullptr;
|
||||
}
|
||||
SoftmaxCrossEntropyParameter *sce_param = new (std::nothrow) SoftmaxCrossEntropyParameter();
|
||||
SoftmaxCrossEntropyParameter *sce_param = reinterpret_cast<SoftmaxCrossEntropyParameter *>
|
||||
(malloc(sizeof(SoftmaxCrossEntropyParameter)));
|
||||
if (sce_param == nullptr) {
|
||||
MS_LOG(ERROR) << "new SoftmaxCrossEntropyParameter failed.";
|
||||
return nullptr;
|
||||
|
@ -65,7 +68,7 @@ OpParameter *PopulatePoolingGradParameter(const mindspore::lite::PrimitiveC *pri
|
|||
MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op.";
|
||||
return nullptr;
|
||||
}
|
||||
PoolingParameter *pooling_param = new (std::nothrow) PoolingParameter();
|
||||
PoolingParameter *pooling_param = reinterpret_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
|
||||
if (pooling_param == nullptr) {
|
||||
MS_LOG(ERROR) << "new PoolingParameter failed.";
|
||||
return nullptr;
|
||||
|
@ -118,7 +121,7 @@ OpParameter *PopulateActivationGradParameter(const mindspore::lite::PrimitiveC *
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
ActivationParameter *act_param = new (std::nothrow) ActivationParameter();
|
||||
ActivationParameter *act_param = reinterpret_cast<ActivationParameter *>(malloc(sizeof(ActivationParameter)));
|
||||
if (act_param == nullptr) {
|
||||
MS_LOG(ERROR) << "new ActivationParameter failed.";
|
||||
return nullptr;
|
||||
|
@ -137,7 +140,7 @@ OpParameter *PopulateConvolutionGradFilterParameter(const mindspore::lite::Primi
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
ConvParameter *param = new (std::nothrow) ConvParameter();
|
||||
ConvParameter *param = reinterpret_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "new Param for conv grad filter failed.";
|
||||
return nullptr;
|
||||
|
@ -178,7 +181,7 @@ OpParameter *PopulateConvolutionGradInputParameter(const mindspore::lite::Primit
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
ConvParameter *param = new (std::nothrow) ConvParameter();
|
||||
ConvParameter *param = reinterpret_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "new Param for conv grad filter failed.";
|
||||
return nullptr;
|
||||
|
@ -219,7 +222,7 @@ OpParameter *PopulatePowerGradParameter(const mindspore::lite::PrimitiveC *primi
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
PowerParameter *power_param = new (std::nothrow) PowerParameter();
|
||||
PowerParameter *power_param = reinterpret_cast<PowerParameter *>(malloc(sizeof(PowerParameter)));
|
||||
if (power_param == nullptr) {
|
||||
MS_LOG(ERROR) << "new PowerParameter failed.";
|
||||
return nullptr;
|
||||
|
@ -232,10 +235,25 @@ OpParameter *PopulatePowerGradParameter(const mindspore::lite::PrimitiveC *primi
|
|||
return reinterpret_cast<OpParameter *>(power_param);
|
||||
}
|
||||
|
||||
OpParameter *PopulateBiasGradParameter(const mindspore::lite::PrimitiveC *primitive) {
|
||||
if (primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ArithmeticParameter *arithmetic_param = reinterpret_cast<ArithmeticParameter *>(malloc(sizeof(ArithmeticParameter)));
|
||||
if (arithmetic_param == nullptr) {
|
||||
MS_LOG(ERROR) << "new ArithmeticParameter failed.";
|
||||
return nullptr;
|
||||
}
|
||||
arithmetic_param->op_parameter_.type_ = primitive->Type();
|
||||
return reinterpret_cast<OpParameter *>(arithmetic_param);
|
||||
}
|
||||
|
||||
void PopulateTrainParameters() {
|
||||
auto ppr = PopulateParameterRegistry::GetInstance();
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_ApplyMomentum, DefaultPopulateParameter);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_BiasGrad, PopulateArithmetic);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_BiasGrad, PopulateBiasGradParameter);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_SoftmaxCrossEntropy, PopulateSoftmaxCrossEntropyParameter);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_ActivationGrad, PopulateActivationGradParameter);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_TupleGetItem, DefaultPopulateParameter);
|
||||
|
|
|
@ -35,6 +35,10 @@ void TrainSession::ReplaceOps() {
|
|||
mindspore::lite::KernelRegistrar tmp(mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32,
|
||||
mindspore::schema::PrimitiveType_Conv2D,
|
||||
mindspore::kernel::CpuConvTrainFp32KernelCreator);
|
||||
|
||||
mindspore::lite::KernelRegistrar tmp0(mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32,
|
||||
mindspore::schema::PrimitiveType_DepthwiseConv2D,
|
||||
mindspore::kernel::CpuConvTrainFp32KernelCreator);
|
||||
}
|
||||
|
||||
int TrainSession::CompileGraph(lite::Model *model) {
|
||||
|
@ -124,5 +128,4 @@ std::vector<tensor::MSTensor *> TrainSession::GetOutputsByName(const std::string
|
|||
}
|
||||
return ret->second;
|
||||
}
|
||||
|
||||
} // namespace mindspore::session
|
||||
|
|
|
@ -0,0 +1,584 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "utils/log_adapter.h"
|
||||
#include "common/common_test.h"
|
||||
#include "src/common/file_utils.h"
|
||||
#include "src/common/file_utils_ext.h"
|
||||
#include "nnacl/fp32/reduce.h"
|
||||
#include "src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/ops/arithmetic_grad.h"
|
||||
|
||||
#ifdef PRIMITIVE_WRITEABLE
|
||||
namespace mindspore {
|
||||
|
||||
ArithmeticParameter *PopulateArithmeticParameter(mindspore::schema::PrimitiveType type,
|
||||
std::vector<lite::Tensor *> inputs,
|
||||
std::vector<lite::Tensor *> outputs) {
|
||||
ArithmeticParameter *arithmetic_param = static_cast<ArithmeticParameter *>(malloc(sizeof(ArithmeticParameter)));
|
||||
if (arithmetic_param == nullptr) {
|
||||
MS_LOG(ERROR) << "new ArithmeticParameter failed.";
|
||||
return nullptr;
|
||||
}
|
||||
arithmetic_param->op_parameter_.type_ = type;
|
||||
schema::PrimitiveT *prim = new schema::PrimitiveT;
|
||||
prim->value.type = type;
|
||||
auto agrad = mindspore::lite::ArithmeticGrad(prim);
|
||||
agrad.InferShape(inputs, outputs);
|
||||
|
||||
arithmetic_param->ndim_ = agrad.NDims();
|
||||
for (size_t i = 0; i < agrad.dyShape().size(); i++) arithmetic_param->out_shape_[i] = (agrad.dyShape())[i];
|
||||
for (size_t i = 0; i < agrad.x1Shape().size(); i++) arithmetic_param->in_shape0_[i] = (agrad.x1Shape())[i];
|
||||
for (size_t i = 0; i < agrad.x2Shape().size(); i++) arithmetic_param->in_shape1_[i] = (agrad.x2Shape())[i];
|
||||
return arithmetic_param;
|
||||
}
|
||||
|
||||
class TestArithmeticGradFp32 : public mindspore::CommonTest {
|
||||
public:
|
||||
TestArithmeticGradFp32() {}
|
||||
};
|
||||
|
||||
std::vector<lite::Tensor *> GenerateTensorsForTest(const char *test, int test_id) {
|
||||
size_t input_size;
|
||||
std::vector<int> large_dim({4, 6});
|
||||
std::vector<int> small_dim({6});
|
||||
int large_size = (4 * 6);
|
||||
int small_size = (1 * 6);
|
||||
char *dx1_file = const_cast<char *>("./test_data/operators/arithmetic_fp32_1_x1_4_6.bin");
|
||||
char *dx2_file = const_cast<char *>("./test_data/operators/arithmetic_fp32_1_x2_1_6.bin");
|
||||
|
||||
if (test_id == 7) {
|
||||
large_dim = std::vector<int>({4, 5, 6});
|
||||
small_dim = std::vector<int>({6});
|
||||
large_size = (4 * 5 * 6);
|
||||
small_size = (6);
|
||||
dx1_file = const_cast<char *>("./test_data/operators/arithmetic_fp32_7_x1_4_5_6.bin");
|
||||
dx2_file = const_cast<char *>("./test_data/operators/arithmetic_fp32_7_x2_1_1_6.bin");
|
||||
}
|
||||
if (test_id >= 8) {
|
||||
large_dim = std::vector<int>({5, 4, 6});
|
||||
small_dim = std::vector<int>({5, 1, 6});
|
||||
large_size = (4 * 5 * 6);
|
||||
small_size = (5 * 6);
|
||||
dx1_file = const_cast<char *>("./test_data/operators/arithmetic_fp32_8_x1_5_4_6.bin");
|
||||
dx2_file = const_cast<char *>("./test_data/operators/arithmetic_fp32_8_x2_5_1_6.bin");
|
||||
}
|
||||
|
||||
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(test, &input_size));
|
||||
lite::Tensor *dy_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim);
|
||||
dy_tensor->SetData(dy_data);
|
||||
|
||||
auto x1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx1_file, &input_size));
|
||||
lite::Tensor *x1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim);
|
||||
x1_tensor->SetData(x1_data);
|
||||
|
||||
auto x2_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx2_file, &input_size));
|
||||
lite::Tensor *x2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim);
|
||||
x2_tensor->SetData(x2_data);
|
||||
|
||||
auto dx1_data = new float[large_size];
|
||||
lite::Tensor *dx1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim);
|
||||
dx1_tensor->SetData(dx1_data);
|
||||
|
||||
auto dx2_data = new float[small_size];
|
||||
lite::Tensor *dx2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim);
|
||||
dx2_tensor->SetData(dx2_data);
|
||||
|
||||
std::vector<lite::Tensor *> ret_vector = {dy_tensor, x1_tensor, x2_tensor, dx1_tensor, dx2_tensor};
|
||||
return ret_vector;
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticGradFp32, TestAddGradFp32) {
|
||||
std::vector<lite::Tensor *> all_tensors =
|
||||
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
|
||||
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
|
||||
auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs);
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr);
|
||||
kernel_obj->Run();
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::cout << output_ptr[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_1_dx1_4_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), output_path));
|
||||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_1_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->MutableData());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestAddGradFp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) {
|
||||
std::vector<lite::Tensor *> all_tensors =
|
||||
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]};
|
||||
std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]};
|
||||
auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs);
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr);
|
||||
kernel_obj->Run();
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::cout << output_ptr[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_1_dx1_4_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[1]->MutableData()), output_path));
|
||||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_1_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->MutableData());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i]; //TODO tensor data is unique pointer
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestAddGrad2Fp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) {
|
||||
std::vector<lite::Tensor *> all_tensors =
|
||||
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_8_dy_5_4_6.bin", 8);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
|
||||
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
|
||||
auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs);
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr);
|
||||
kernel_obj->Run();
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::cout << output_ptr[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_8_dx2_5_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[1]->MutableData()), output_path));
|
||||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_8_dx1_5_4_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->MutableData());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestAddGrad3Fp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticGradFp32, TestSubGradFp32) {
|
||||
std::vector<lite::Tensor *> all_tensors =
|
||||
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_2_dy_4_6.bin", 2);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
|
||||
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
|
||||
auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs);
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr);
|
||||
kernel_obj->Run();
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::cout << output_ptr[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_2_dx1_4_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), output_path));
|
||||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_2_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->MutableData());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestSubGradFp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) {
|
||||
std::vector<lite::Tensor *> all_tensors =
|
||||
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_3_dy_4_6.bin", 3);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]};
|
||||
std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]};
|
||||
auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs);
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr);
|
||||
kernel_obj->Run();
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::cout << output_ptr[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_3_dx1_4_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[1]->MutableData()), output_path));
|
||||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_3_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->MutableData());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestSubGrad2Fp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticGradFp32, TestMulGradFp32) {
|
||||
std::vector<lite::Tensor *> all_tensors =
|
||||
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
|
||||
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
|
||||
auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs);
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr);
|
||||
|
||||
int loop_count = 1000;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
kernel_obj->Run();
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
printf("total cost (for %d loops): %lu us\n", loop_count, cost);
|
||||
// auto time_avg = cost / loop_count;
|
||||
// printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::cout << output_ptr[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_4_dx1_4_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), output_path));
|
||||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_4_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->MutableData());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
delete kernel_obj;
|
||||
// delete param;
|
||||
MS_LOG(INFO) << "TestMulGradFp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) {
|
||||
std::vector<lite::Tensor *> all_tensors =
|
||||
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]};
|
||||
std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]};
|
||||
auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs);
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr);
|
||||
kernel_obj->Run();
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::cout << output_ptr[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_4_dx1_4_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[1]->MutableData()), output_path));
|
||||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_4_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->MutableData());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestMulGrad2Fp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) {
|
||||
std::vector<lite::Tensor *> all_tensors =
|
||||
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
|
||||
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
|
||||
auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs);
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr);
|
||||
kernel_obj->Run();
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::cout << output_ptr[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_9_dx1_5_4_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), output_path));
|
||||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_9_dx2_5_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->MutableData());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestMulGrad3Fp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) {
|
||||
std::vector<lite::Tensor *> all_tensors =
|
||||
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]};
|
||||
std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]};
|
||||
auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs);
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr);
|
||||
kernel_obj->Run();
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::cout << output_ptr[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_9_dx1_5_4_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[1]->MutableData()), output_path));
|
||||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_9_dx2_5_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->MutableData());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestMulGrad4Fp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticGradFp32, TestDivGradFp32) {
|
||||
std::vector<lite::Tensor *> all_tensors =
|
||||
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_5_dy_4_6.bin", 5);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
|
||||
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
|
||||
auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs);
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr);
|
||||
kernel_obj->Run();
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::cout << output_ptr[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_5_dx1_4_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), output_path));
|
||||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_5_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->MutableData());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete kernel_obj;
|
||||
// delete param;
|
||||
MS_LOG(INFO) << "TestDivGradFp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) {
|
||||
std::vector<lite::Tensor *> all_tensors =
|
||||
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_6_dy_4_6.bin", 6);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[2], all_tensors[1]};
|
||||
std::vector<lite::Tensor *> outputs = {all_tensors[4], all_tensors[3]};
|
||||
auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs);
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr);
|
||||
kernel_obj->Run();
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::cout << output_ptr[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_6_dx2_4_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[1]->MutableData()), dx2_path));
|
||||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_6_dx1_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path));
|
||||
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->MutableData());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestDivGrad2Fp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) {
|
||||
std::vector<lite::Tensor *> all_tensors =
|
||||
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_10_dy_5_4_6.bin", 10);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
|
||||
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
|
||||
auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs);
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr);
|
||||
kernel_obj->Run();
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::cout << output_ptr[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string dx1_path = "./test_data/operators/arithmetic_fp32_10_dx1_5_4_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), dx1_path));
|
||||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_10_dx2_5_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path));
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->MutableData());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestDivGrad3Fp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) {
|
||||
std::vector<lite::Tensor *> all_tensors =
|
||||
GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_7_dy_4_5_6.bin", 7);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {all_tensors[0], all_tensors[1], all_tensors[2]};
|
||||
std::vector<lite::Tensor *> outputs = {all_tensors[3], all_tensors[4]};
|
||||
auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs);
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), NULL, desc, nullptr);
|
||||
kernel_obj->Run();
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::cout << output_ptr[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string dx1_path = "./test_data/operators/arithmetic_fp32_7_dx1_4_5_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(reinterpret_cast<float *>(outputs[0]->MutableData()), dx1_path));
|
||||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_7_dx2_1_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path));
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->MutableData());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestDivGrad2Fp32 passed";
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
||||
|
||||
#endif
|
|
@ -30,8 +30,7 @@ class TestBiasGradFp32 : public mindspore::CommonTest {
|
|||
|
||||
TEST_F(TestBiasGradFp32, BiasGradFp32) {
|
||||
// prepare stage
|
||||
auto bias_param = new ArithmeticParameter();
|
||||
|
||||
ArithmeticParameter* bias_param = static_cast<ArithmeticParameter*>(malloc(sizeof(ArithmeticParameter)));
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/operators/biasgradfp32_1_dy_10_28_28_7.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
|
|
@ -43,7 +43,7 @@ lite::Tensor *TestBNGradFp32::CreateInTensor(std::string file_name, std::vector<
|
|||
|
||||
TEST_F(TestBNGradFp32, BNGradFp32) {
|
||||
// prepare stage
|
||||
auto bn_param = new BNGradParameter();
|
||||
auto bn_param = static_cast<BNGradParameter*>(malloc(sizeof(BNGradParameter)));
|
||||
bn_param->epsilon_ = 0.00001;
|
||||
bn_param->momentum_ = 0.1;
|
||||
const int batch = 2;
|
||||
|
@ -88,22 +88,24 @@ TEST_F(TestBNGradFp32, BNGradFp32) {
|
|||
std::cout << "==========dx==========\n";
|
||||
auto dx = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
for (int i = 0; i < 7; i++) std::cout << dx[i] << " ";
|
||||
std::cout << "\n";
|
||||
auto res = mindspore::lite::CompareRelativeOutput(dx, "./test_data/bngrad/output_dx_2_4_5_3.bin");
|
||||
std::cout << "\n=======dscale=======\n";
|
||||
auto dscale = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
for (int i = 0; i < channels; i++) std::cout << dscale[i] << " ";
|
||||
std::cout << "\n";
|
||||
int res = mindspore::lite::CompareRelativeOutput(dscale, "./test_data/bngrad/output_dscale_3.bin");
|
||||
res = mindspore::lite::CompareRelativeOutput(dscale, "./test_data/bngrad/output_dscale_3.bin");
|
||||
EXPECT_EQ(res, 0);
|
||||
std::cout << "==========dbias==========\n";
|
||||
auto dbias = reinterpret_cast<float *>(outputs[2]->MutableData());
|
||||
for (int i = 0; i < 3; i++) std::cout << dbias[i] << " ";
|
||||
std::cout << "\n";
|
||||
res = mindspore::lite::CompareRelativeOutput(dscale, "./test_data/bngrad/output_dscale_3.bin");
|
||||
res = mindspore::lite::CompareRelativeOutput(dbias, "./test_data/bngrad/output_dbias_3.bin");
|
||||
EXPECT_EQ(res, 0);
|
||||
for (auto v : inputs) {
|
||||
delete[] reinterpret_cast<float *>(v->MutableData());
|
||||
v->SetData(nullptr);
|
||||
// delete v;
|
||||
delete v;
|
||||
}
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "BNGradFp32 passed";
|
||||
|
|
|
@ -77,7 +77,7 @@ void InitConvParamGroup3Dilation2FP32(ConvParameter *conv_param) {
|
|||
|
||||
TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) {
|
||||
// prepare stage
|
||||
auto conv_param = new ConvParameter();
|
||||
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
|
||||
InitConvParamGroup1FP32(conv_param);
|
||||
|
||||
size_t dy_size;
|
||||
|
@ -144,7 +144,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) {
|
|||
|
||||
TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) {
|
||||
// prepare stage
|
||||
auto conv_param = new ConvParameter();
|
||||
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
|
||||
InitConvParamGroup1FP32(conv_param);
|
||||
|
||||
size_t dy_size;
|
||||
|
@ -211,7 +211,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) {
|
|||
|
||||
TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) {
|
||||
// prepare stage
|
||||
auto conv_param = new ConvParameter();
|
||||
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
|
||||
InitConvParamGroup3FP32(conv_param);
|
||||
|
||||
size_t dy_size;
|
||||
|
@ -277,7 +277,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) {
|
|||
|
||||
TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) {
|
||||
// prepare stage
|
||||
auto conv_param = new ConvParameter();
|
||||
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
|
||||
InitConvParamGroup3FP32(conv_param);
|
||||
|
||||
size_t dy_size;
|
||||
|
@ -344,7 +344,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) {
|
|||
|
||||
TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) {
|
||||
// prepare stage
|
||||
auto conv_param = new ConvParameter();
|
||||
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
|
||||
|
||||
InitConvParamGroup3Dilation2FP32(conv_param);
|
||||
|
||||
|
@ -410,7 +410,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) {
|
|||
|
||||
TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) {
|
||||
// prepare stage
|
||||
auto conv_param = new ConvParameter();
|
||||
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
|
||||
InitConvParamGroup3Dilation2FP32(conv_param);
|
||||
|
||||
size_t dy_size;
|
||||
|
@ -476,7 +476,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) {
|
|||
|
||||
TEST_F(TestConvolutionGradFp32, ConvGroupDilation) {
|
||||
// prepare stage
|
||||
auto conv_param = new ConvParameter();
|
||||
auto conv_param = static_cast<ConvParameter *>(malloc(sizeof(ConvParameter)));
|
||||
InitConvParamGroup3Dilation2FP32(conv_param);
|
||||
|
||||
size_t x_size;
|
||||
|
|
|
@ -73,7 +73,7 @@ class NetworkTest : public mindspore::CommonTest {
|
|||
// +-------------+ |
|
||||
// V dw(9) |
|
||||
// +-----------Update-----+
|
||||
|
||||
#if 0
|
||||
TEST_F(NetworkTest, tuning_layer) {
|
||||
const int BATCH_SIZE = 32;
|
||||
const int NUM_CLASSES = 10;
|
||||
|
@ -177,7 +177,7 @@ TEST_F(NetworkTest, tuning_layer) {
|
|||
node->name = "Momentum";
|
||||
meta_graph->nodes.emplace_back(std::move(node));
|
||||
}
|
||||
meta_graph->inputIndex = {6, 0}; // XXX TODO why is it reverse?
|
||||
meta_graph->inputIndex = {0, 6};
|
||||
meta_graph->outputIndex = {5, 14};
|
||||
|
||||
auto input0 = std::make_unique<schema::TensorT>();
|
||||
|
@ -209,6 +209,7 @@ TEST_F(NetworkTest, tuning_layer) {
|
|||
weight->data.resize(weight_size);
|
||||
std::copy(buf, buf + weight_size, weight->data.data());
|
||||
meta_graph->allTensors.emplace_back(std::move(weight));
|
||||
delete [] buf;
|
||||
// tensor 3 - matmul
|
||||
auto input3 = std::make_unique<schema::TensorT>();
|
||||
input3->nodeType = schema::NodeType::NodeType_Parameter;
|
||||
|
@ -231,6 +232,7 @@ TEST_F(NetworkTest, tuning_layer) {
|
|||
bias->data.resize(bias_size);
|
||||
std::copy(buf, buf + bias_size, bias->data.data());
|
||||
meta_graph->allTensors.emplace_back(std::move(bias));
|
||||
delete [] buf;
|
||||
|
||||
// tensor 5 - bias_add
|
||||
auto input5 = std::make_unique<schema::TensorT>();
|
||||
|
@ -366,13 +368,13 @@ TEST_F(NetworkTest, tuning_layer) {
|
|||
ASSERT_NE(nullptr, model);
|
||||
meta_graph.reset();
|
||||
content = nullptr;
|
||||
auto context = new lite::Context;
|
||||
context->device_type_ = lite::DT_CPU;
|
||||
context->cpu_bind_mode_ = lite::NO_BIND;
|
||||
context->thread_num_ = 1;
|
||||
lite::Context context;
|
||||
context.device_type_ = lite::DT_CPU;
|
||||
context.cpu_bind_mode_ = lite::NO_BIND;
|
||||
context.thread_num_ = 1;
|
||||
auto session = new session::TrainSession();
|
||||
ASSERT_NE(nullptr, session);
|
||||
session->Init(context);
|
||||
session->Init(&context);
|
||||
auto ret = session->CompileGraph(model);
|
||||
ASSERT_EQ(lite::RET_OK, ret);
|
||||
session->train();
|
||||
|
@ -392,7 +394,7 @@ TEST_F(NetworkTest, tuning_layer) {
|
|||
//===================================================
|
||||
ASSERT_EQ(input_size, inTensor->Size());
|
||||
memcpy(data, input_data, input_size);
|
||||
|
||||
delete [] buf;
|
||||
auto labelTensor = inputs.at(1);
|
||||
ASSERT_NE(nullptr, labelTensor);
|
||||
ASSERT_EQ(BATCH_SIZE, labelTensor->ElementsNum());
|
||||
|
@ -408,7 +410,7 @@ TEST_F(NetworkTest, tuning_layer) {
|
|||
ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type());
|
||||
auto *outData = reinterpret_cast<float *>(outTensor->MutableData());
|
||||
ASSERT_NE(nullptr, outData);
|
||||
std::cout << "========================dW=====================" << std::endl;
|
||||
std::cout << "==============Initial=Scores===================" << std::endl;
|
||||
for (int i = 0; i < 20; i++) {
|
||||
std::cout << outData[i] << ", ";
|
||||
}
|
||||
|
@ -422,27 +424,19 @@ TEST_F(NetworkTest, tuning_layer) {
|
|||
ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type());
|
||||
outData = reinterpret_cast<float *>(outTensor->MutableData());
|
||||
ASSERT_NE(nullptr, outData);
|
||||
std::cout << "========================dW=====================" << std::endl;
|
||||
std::cout << "==============Scores=after-single=train========" << std::endl;
|
||||
for (int i = 0; i < 20; i++) {
|
||||
std::cout << outData[i] << ", ";
|
||||
}
|
||||
//===================================================
|
||||
#if 0
|
||||
size_t output_size;
|
||||
std::string output_path = "./convfp32_out_1_28_28_32.bin";
|
||||
buf = mindspore::lite::ReadFile(output_path.c_str(), &output_size);
|
||||
ASSERT_NE(nullptr, buf);
|
||||
auto output_data = reinterpret_cast<float *>(buf);
|
||||
ASSERT_NE(nullptr, output_data);
|
||||
//===================================================
|
||||
ASSERT_EQ(output_size, runOutput->Size());
|
||||
for (size_t i = 0; i < runOutput->ElementsNum(); i++) {
|
||||
ASSERT_EQ(output_data[i], outData[i]);
|
||||
}
|
||||
#endif
|
||||
MS_LOG(INFO) << "Passed";
|
||||
}
|
||||
std::string output_path = "./test_data/train/train_output_32_10.bin";
|
||||
auto error = lite::RelativeOutputError(outData, output_path);
|
||||
EXPECT_LT(error, 2e-3);
|
||||
MS_LOG(INFO) << "TuningLayer passed";
|
||||
|
||||
delete model;
|
||||
delete session;
|
||||
}
|
||||
#endif
|
||||
int32_t fileIterator(mindspore::session::TrainSession *session, const std::string &path,
|
||||
std::function<int32_t(mindspore::session::TrainSession *session, const std::string &)> cb) {
|
||||
int32_t res = 0;
|
||||
|
@ -459,7 +453,7 @@ int32_t fileIterator(mindspore::session::TrainSession *session, const std::strin
|
|||
}
|
||||
void replaceExt(const std::string &src, std::string *dst) { *dst = src.substr(0, src.find_last_of('.')) + ".emb"; }
|
||||
|
||||
int32_t runEffNet(mindspore::session::TrainSession *session, const std::string &in, const std::string &out) {
|
||||
int32_t runEffNet(mindspore::lite::LiteSession *session, const std::string &in, const std::string &out) {
|
||||
// setup input
|
||||
auto inputs = session->GetInputs();
|
||||
// ASSERT_EQ(inputs.size(), 1);
|
||||
|
@ -473,14 +467,15 @@ int32_t runEffNet(mindspore::session::TrainSession *session, const std::string &
|
|||
auto input_data = reinterpret_cast<float *>(in_buf);
|
||||
// ASSERT_EQ(input_size, inTensor->Size());
|
||||
std::copy(input_data, input_data + inTensor->ElementsNum(), data);
|
||||
delete [] in_buf;
|
||||
|
||||
// execute network
|
||||
session->RunGraph();
|
||||
|
||||
// compare outputs
|
||||
auto outputs = session->GetOutputMap();
|
||||
auto outputs = session->GetOutputs();
|
||||
auto output = ((outputs.begin())->second);
|
||||
float *output_data = reinterpret_cast<float *>(output.at(0)->MutableData());
|
||||
float *output_data = reinterpret_cast<float *>(output->MutableData());
|
||||
|
||||
return mindspore::lite::CompareRelativeOutput(output_data, out.c_str());
|
||||
}
|
||||
|
@ -488,15 +483,19 @@ int32_t runEffNet(mindspore::session::TrainSession *session, const std::string &
|
|||
TEST_F(NetworkTest, efficient_net) {
|
||||
char *buf = nullptr;
|
||||
size_t net_size = 0;
|
||||
std::string net = "./test_data/nets/efficientnet_b0_f.ms";
|
||||
// std::string net = "./test_data/nets/efficientnet_b0_f.ms";
|
||||
|
||||
std::string net = "./test_data/nets/effnetb0_fwd_nofuse.ms";
|
||||
ReadFile(net.c_str(), &net_size, &buf);
|
||||
auto model = lite::Model::Import(buf, net_size);
|
||||
delete [] buf;
|
||||
auto context = new lite::Context;
|
||||
context->device_type_ = lite::DT_CPU;
|
||||
context->cpu_bind_mode_ = lite::NO_BIND;
|
||||
context->thread_num_ = 1;
|
||||
|
||||
auto session = new mindspore::session::TrainSession();
|
||||
// auto session = new mindspore::lite::LiteSession();
|
||||
ASSERT_NE(session, nullptr);
|
||||
auto ret = session->Init(context);
|
||||
ASSERT_EQ(lite::RET_OK, ret);
|
||||
|
@ -506,7 +505,7 @@ TEST_F(NetworkTest, efficient_net) {
|
|||
|
||||
#if 0
|
||||
std::string path = "/opt/share/MiniBinEmbDataset/";
|
||||
auto res = fileIterator(session, path, [](mindspore::session::TrainSession *session, const std::string &in) {
|
||||
auto res = fileIterator(session, path, [](mindspore::lite::LiteSession *session, const std::string &in) {
|
||||
int32_t res = 0;
|
||||
if (in.find(".bin") != std::string::npos) {
|
||||
std::string out;
|
||||
|
@ -549,6 +548,9 @@ TEST_F(NetworkTest, efficient_net) {
|
|||
// float* output_data = reinterpret_cast<float *>(output.at(0)->MutableData());
|
||||
// int res = lite::CompareRelativeOutput(output_data, output_path);
|
||||
ASSERT_EQ(res, 0);
|
||||
delete model;
|
||||
delete session;
|
||||
delete context;
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -54,11 +54,12 @@ void InitPoolingParamFP32(PoolingParameter *pooling_param) {
|
|||
pooling_param->pad_l_ = 1;
|
||||
pooling_param->pad_r_ = 1;
|
||||
pooling_param->thread_num_ = 1;
|
||||
pooling_param->global_ = false;
|
||||
}
|
||||
|
||||
TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) {
|
||||
// prepare stage
|
||||
auto pooling_param = new PoolingParameter();
|
||||
auto pooling_param = static_cast<PoolingParameter*>(malloc(sizeof(PoolingParameter)));
|
||||
InitPoolingParamFP32(pooling_param);
|
||||
pooling_param->output_channel_ = 3;
|
||||
pooling_param->pool_mode_ = PoolMode_AvgPool;
|
||||
|
@ -95,20 +96,21 @@ TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) {
|
|||
}
|
||||
std::cout << std::endl;
|
||||
std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_1_28_28_3.bin";
|
||||
lite::CompareOutput(output_data, output_path);
|
||||
auto res = lite::CompareOutput(output_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] output_data;
|
||||
delete pooling_param;
|
||||
free(pooling_param);
|
||||
MS_LOG(INFO) << "TestAvgPoolingGradFp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) {
|
||||
// prepare stage
|
||||
auto pooling_param = new PoolingParameter();
|
||||
auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
|
||||
InitPoolingParamFP32(pooling_param);
|
||||
|
||||
pooling_param->output_channel_ = 3;
|
||||
pooling_param->pool_mode_ = PoolMode_AvgPool;
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
|
@ -150,7 +152,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) {
|
|||
}
|
||||
std::cout << std::endl;
|
||||
std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_1_28_28_3.bin";
|
||||
lite::CompareOutput(output_data, output_path);
|
||||
auto res = lite::CompareOutput(output_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] input1_data;
|
||||
|
@ -165,38 +168,36 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) {
|
|||
|
||||
TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) {
|
||||
// prepare stage
|
||||
auto pooling_param = new PoolingParameter();
|
||||
auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
|
||||
InitPoolingParamFP32(pooling_param);
|
||||
|
||||
pooling_param->output_channel_ = 3;
|
||||
pooling_param->input_batch_ = 3;
|
||||
pooling_param->output_batch_ = 3;
|
||||
pooling_param->pool_mode_ = PoolMode_AvgPool;
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
// uint64_t time_avg = 0;
|
||||
size_t output_data_size =
|
||||
pooling_param->output_batch_ * pooling_param->output_channel_ * pooling_param->input_h_ * pooling_param->input_w_;
|
||||
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_3_28_28_3.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
std::vector<int> dim_dy({1, 28, 28, 3});
|
||||
std::vector<int> dim_dy({3, 28, 28, 3});
|
||||
lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
|
||||
dy_tensor.SetData(input_data);
|
||||
|
||||
std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_3_28_28_3.bin";
|
||||
auto input1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1_path.c_str(), &input_size));
|
||||
std::vector<int> dim_x({1, 28, 28, 3});
|
||||
std::vector<int> dim_x({3, 28, 28, 3});
|
||||
lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
x_tensor.SetData(input1_data);
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {&dy_tensor, &x_tensor};
|
||||
|
||||
auto output_data = new float[output_data_size];
|
||||
std::vector<int> dim_dx({1, 28, 28, 3});
|
||||
std::vector<int> dim_dx({3, 28, 28, 3});
|
||||
lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx);
|
||||
dx_tensor.SetData(output_data);
|
||||
dx_tensor.MallocData();
|
||||
auto output_data = reinterpret_cast<float *>(dx_tensor.MutableData());
|
||||
std::vector<lite::Tensor *> outputs = {&dx_tensor};
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
|
||||
|
@ -212,12 +213,11 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) {
|
|||
}
|
||||
std::cout << std::endl;
|
||||
std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_3_28_28_3.bin";
|
||||
lite::CompareOutput(output_data, output_path);
|
||||
auto res = lite::CompareOutput(output_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] input1_data;
|
||||
delete[] output_data;
|
||||
dx_tensor.SetData(nullptr);
|
||||
x_tensor.SetData(nullptr);
|
||||
dy_tensor.SetData(nullptr);
|
||||
// delete pooling_param;
|
||||
|
@ -228,7 +228,7 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) {
|
|||
TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) {
|
||||
// prepare stage
|
||||
// input size will be equal to the original size of x, output size will be the output size as in forward
|
||||
auto pool = new PoolingParameter();
|
||||
auto pool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
|
||||
InitPoolingParamFP32(pool);
|
||||
pool->output_channel_ = 3;
|
||||
pool->pool_mode_ = PoolMode_AvgPool;
|
||||
|
@ -240,7 +240,6 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) {
|
|||
pool->stride_w_ = 2;
|
||||
|
||||
size_t input_size;
|
||||
size_t y_data_size = pool->output_batch_ * pool->output_channel_ * pool->input_h_ * pool->input_w_;
|
||||
|
||||
auto x_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s2_x_3_28_28_3.bin", &input_size));
|
||||
|
@ -253,11 +252,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) {
|
|||
std::vector<int> dim_y({pool->output_batch_, pool->output_h_, pool->output_w_, pool->output_channel_});
|
||||
lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
auto out_data = new float[y_data_size];
|
||||
lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
out_tensor.SetData(out_data);
|
||||
|
||||
out_tensor.MallocData();
|
||||
float *out_data = static_cast<float *>(out_tensor.MutableData());
|
||||
std::vector<lite::Tensor *> inputs = {&yt_tensor, &x_tensor};
|
||||
std::vector<lite::Tensor *> outputs = {&out_tensor};
|
||||
// ----------------------------------------
|
||||
|
@ -274,7 +271,6 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) {
|
|||
|
||||
std::string output_path = "./test_data/pooling/avgpoolgradfp32_s2_dx_3_28_28_3.bin";
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] x_data;
|
||||
|
@ -283,7 +279,6 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) {
|
|||
// delete conv_param;
|
||||
x_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
MS_LOG(INFO) << "AvgPoolGradStride2Fp32 Filter Grad passed";
|
||||
}
|
||||
|
@ -291,7 +286,7 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) {
|
|||
TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) {
|
||||
// prepare stage
|
||||
// input size will be equal to the original size of x, output size will be the output size as in forward
|
||||
auto pool = new PoolingParameter();
|
||||
auto pool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
|
||||
InitPoolingParamFP32(pool);
|
||||
pool->output_channel_ = 3;
|
||||
pool->pool_mode_ = PoolMode_AvgPool;
|
||||
|
@ -303,7 +298,6 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) {
|
|||
pool->stride_w_ = 3;
|
||||
|
||||
size_t input_size;
|
||||
size_t y_data_size = pool->output_batch_ * pool->output_channel_ * pool->input_h_ * pool->input_w_;
|
||||
|
||||
auto x_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s3_x_3_28_28_3.bin", &input_size));
|
||||
|
@ -317,9 +311,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) {
|
|||
lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
auto out_data = new float[y_data_size];
|
||||
lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
out_tensor.SetData(out_data);
|
||||
out_tensor.MallocData();
|
||||
auto out_data = static_cast<float *>(out_tensor.MutableData());
|
||||
|
||||
std::vector<lite::Tensor *> inputs = {&yt_tensor, &x_tensor};
|
||||
std::vector<lite::Tensor *> outputs = {&out_tensor};
|
||||
|
@ -346,14 +340,13 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) {
|
|||
// delete conv_param;
|
||||
x_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
MS_LOG(INFO) << "AvgPoolGradStride3Fp32 Filter Grad passed";
|
||||
}
|
||||
|
||||
TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) {
|
||||
// prepare stage
|
||||
auto pooling_param = new PoolingParameter();
|
||||
auto pooling_param = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
|
||||
InitPoolingParamFP32(pooling_param);
|
||||
pooling_param->output_channel_ = 3;
|
||||
pooling_param->pool_mode_ = PoolMode_MaxPool;
|
||||
|
@ -395,10 +388,11 @@ TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) {
|
|||
}
|
||||
std::cout << std::endl;
|
||||
std::string output_path = "./test_data/pooling/maxpoolgradfp32_1_xgrad_1_28_28_3.bin";
|
||||
lite::CompareOutput(output_data, output_path);
|
||||
auto res = lite::CompareOutput(output_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
free(pooling_param);
|
||||
delete[] in_data;
|
||||
delete pooling_param;
|
||||
delete[] dy_data;
|
||||
delete[] dx_data;
|
||||
delete[] output_data;
|
||||
|
@ -526,7 +520,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolingKernelGradFp32) {
|
|||
TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) {
|
||||
// prepare stage
|
||||
// input size will be equal to the original size of x, output size will be the output size as in forward
|
||||
auto maxpool = new PoolingParameter();
|
||||
auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
|
||||
InitPoolingParamFP32(maxpool);
|
||||
maxpool->output_channel_ = 3;
|
||||
maxpool->pool_mode_ = PoolMode_MaxPool;
|
||||
|
@ -534,7 +528,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) {
|
|||
maxpool->output_batch_ = 3;
|
||||
|
||||
size_t input_size;
|
||||
size_t y_data_size = maxpool->output_batch_ * maxpool->output_channel_ * maxpool->input_h_ * maxpool->input_w_;
|
||||
|
||||
auto x_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_x_3_28_28_3.bin", &input_size));
|
||||
|
@ -553,10 +546,9 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) {
|
|||
lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
auto out_data = new float[y_data_size];
|
||||
lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
out_tensor.SetData(out_data);
|
||||
|
||||
out_tensor.MallocData();
|
||||
auto out_data = static_cast<float *>(out_tensor.MutableData());
|
||||
std::vector<lite::Tensor *> maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor};
|
||||
std::vector<lite::Tensor *> maxpool_outputs = {&out_tensor};
|
||||
// ----------------------------------------
|
||||
|
@ -585,7 +577,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) {
|
|||
x_tensor.SetData(nullptr);
|
||||
y_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
MS_LOG(INFO) << "MaxPoolGradBatchFp32 Filter Grad passed";
|
||||
}
|
||||
|
@ -593,7 +584,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) {
|
|||
TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) {
|
||||
// prepare stage
|
||||
// input size will be equal to the original size of x, output size will be the output size as in forward
|
||||
auto maxpool = new PoolingParameter();
|
||||
auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
|
||||
InitPoolingParamFP32(maxpool);
|
||||
maxpool->output_channel_ = 3;
|
||||
maxpool->input_channel_ = 3;
|
||||
|
@ -606,7 +597,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) {
|
|||
maxpool->stride_w_ = 2;
|
||||
|
||||
size_t input_size;
|
||||
size_t y_data_size = maxpool->output_batch_ * maxpool->output_channel_ * maxpool->input_h_ * maxpool->input_w_;
|
||||
|
||||
auto x_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_x_3_28_28_3.bin", &input_size));
|
||||
|
@ -625,9 +615,9 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) {
|
|||
lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
auto out_data = new float[y_data_size];
|
||||
lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
out_tensor.SetData(out_data);
|
||||
out_tensor.MallocData();
|
||||
auto out_data = static_cast<float *>(out_tensor.MutableData());
|
||||
|
||||
std::vector<lite::Tensor *> maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor};
|
||||
std::vector<lite::Tensor *> maxpool_outputs = {&out_tensor};
|
||||
|
@ -657,7 +647,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) {
|
|||
x_tensor.SetData(nullptr);
|
||||
y_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
MS_LOG(INFO) << "MaxPoolGradStride2Fp32 Filter Grad passed";
|
||||
}
|
||||
|
@ -665,7 +654,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) {
|
|||
TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) {
|
||||
// prepare stage
|
||||
// input size will be equal to the original size of x, output size will be the output size as in forward
|
||||
auto maxpool = new PoolingParameter();
|
||||
auto maxpool = static_cast<PoolingParameter *>(malloc(sizeof(PoolingParameter)));
|
||||
InitPoolingParamFP32(maxpool);
|
||||
maxpool->output_channel_ = 3;
|
||||
maxpool->input_channel_ = 3;
|
||||
|
@ -678,7 +667,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) {
|
|||
maxpool->stride_w_ = 3;
|
||||
|
||||
size_t input_size;
|
||||
size_t y_data_size = maxpool->output_batch_ * maxpool->output_channel_ * maxpool->input_h_ * maxpool->input_w_;
|
||||
|
||||
auto x_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_x_3_28_28_3.bin", &input_size));
|
||||
|
@ -697,9 +685,9 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) {
|
|||
lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
auto out_data = new float[y_data_size];
|
||||
lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
out_tensor.SetData(out_data);
|
||||
out_tensor.MallocData();
|
||||
auto out_data = static_cast<float *>(out_tensor.MutableData());
|
||||
|
||||
std::vector<lite::Tensor *> maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor};
|
||||
std::vector<lite::Tensor *> maxpool_outputs = {&out_tensor};
|
||||
|
@ -729,7 +717,6 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) {
|
|||
x_tensor.SetData(nullptr);
|
||||
y_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
MS_LOG(INFO) << "MaxPoolGradStride3Fp32 Filter Grad passed";
|
||||
}
|
||||
|
|
|
@ -0,0 +1,696 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
// #include "mindspore/lite/src/ir/tensor.h"
|
||||
// #include "mindspore/lite/src/lite_kernel.h"
|
||||
|
||||
#include "mindspore/lite/include/context.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "common/common_test.h"
|
||||
#include "mindspore/lite/src/kernel_registry.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "src/common/file_utils.h"
|
||||
#include "src/common/file_utils_ext.h"
|
||||
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h"
|
||||
#include "mindspore/lite/nnacl/fp32_grad/softmax_grad.h"
|
||||
|
||||
namespace mindspore {
|
||||
class TestSoftmaxGradFp32 : public mindspore::CommonTest {
|
||||
public:
|
||||
TestSoftmaxGradFp32() {}
|
||||
};
|
||||
|
||||
void InitSoftMaxParam(SoftmaxParameter *softmax_param, int axis) {
|
||||
softmax_param->axis_ = axis;
|
||||
softmax_param->element_size_ = 1188;
|
||||
softmax_param->n_dim_ = 4;
|
||||
softmax_param->input_shape_[0] = 1;
|
||||
softmax_param->input_shape_[1] = 9;
|
||||
softmax_param->input_shape_[2] = 11;
|
||||
softmax_param->input_shape_[3] = 12;
|
||||
}
|
||||
|
||||
void InitSoftMaxParam(SoftmaxParameter *softmax_param, int axis, int n, int c, int h, int w) {
|
||||
softmax_param->axis_ = axis;
|
||||
softmax_param->element_size_ = n * c * h * w;
|
||||
softmax_param->n_dim_ = 4;
|
||||
softmax_param->input_shape_[0] = n;
|
||||
softmax_param->input_shape_[1] = c;
|
||||
softmax_param->input_shape_[2] = h;
|
||||
softmax_param->input_shape_[3] = w;
|
||||
}
|
||||
|
||||
#if 0 // kernel testing
|
||||
TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis0) {
|
||||
auto softmax_param = reinterpret_cast<SoftmaxParameter *>(malloc(sizeof(SoftmaxParameter)));
|
||||
// set parameters
|
||||
InitSoftMaxParam(softmax_param, 0);
|
||||
|
||||
std::vector<int> shape = {1, 9, 11, 12};
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/softmax/softmaxgrad_yinput.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
input_tensor.SetData(input_data);
|
||||
|
||||
std::string yt_path = "./test_data/softmax/softmaxgrad_yt_input.bin";
|
||||
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
|
||||
lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
uint64_t time_avg = 0;
|
||||
|
||||
auto out_data = new float[softmax_param->element_size_];
|
||||
lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
out_tensor.SetData(out_data);
|
||||
|
||||
std::vector<lite::tensor::Tensor *> inputs = {&input_tensor, &yt_tensor};
|
||||
std::vector<lite::tensor::Tensor *> outputs = {&out_tensor};
|
||||
|
||||
// float sum_data[6];
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(softmax_param), NULL, desc, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
kernel->Run();
|
||||
}
|
||||
|
||||
int loop_count = 3;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
kernel->Run();
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
time_avg = cost / loop_count;
|
||||
printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
|
||||
std::string output_path = "./test_data/softmax/softmaxgrad_out.bin";
|
||||
// auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] yt_data;
|
||||
delete[] out_data;
|
||||
input_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
// delete softmax_param;
|
||||
|
||||
MS_LOG(INFO) << "SoftmaxGradKernelAxis0 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis1) {
|
||||
auto softmax_param = reinterpret_cast<SoftmaxParameter *>(malloc(sizeof(SoftmaxParameter)));
|
||||
// set parameters
|
||||
InitSoftMaxParam(softmax_param, 1);
|
||||
|
||||
std::vector<int> shape = {1, 9, 11, 12};
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/softmax/softmaxgrad_1_yinput.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
input_tensor.SetData(input_data);
|
||||
|
||||
std::string yt_path = "./test_data/softmax/softmaxgrad_1_yt_input.bin";
|
||||
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
|
||||
lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
uint64_t time_avg = 0;
|
||||
|
||||
auto out_data = new float[softmax_param->element_size_];
|
||||
lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
out_tensor.SetData(out_data);
|
||||
|
||||
std::vector<lite::tensor::Tensor *> inputs = {&input_tensor, &yt_tensor};
|
||||
std::vector<lite::tensor::Tensor *> outputs = {&out_tensor};
|
||||
|
||||
// float sum_data[6];
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(softmax_param), NULL, desc, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
kernel->Run();
|
||||
}
|
||||
|
||||
int loop_count = 3;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
kernel->Run();
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
time_avg = cost / loop_count;
|
||||
printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
|
||||
std::string output_path = "./test_data/softmax/softmaxgrad_1_out.bin";
|
||||
// auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] yt_data;
|
||||
delete[] out_data;
|
||||
input_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
// delete softmax_param;
|
||||
|
||||
MS_LOG(INFO) << "SoftmaxGradKernelAxis1 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis2) {
|
||||
auto softmax_param = reinterpret_cast<SoftmaxParameter *>(malloc(sizeof(SoftmaxParameter)));
|
||||
// set parameters
|
||||
InitSoftMaxParam(softmax_param, 2);
|
||||
|
||||
std::vector<int> shape = {1, 9, 11, 12};
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/softmax/softmaxgrad_2_yinput.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
input_tensor.SetData(input_data);
|
||||
|
||||
std::string yt_path = "./test_data/softmax/softmaxgrad_2_yt_input.bin";
|
||||
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
|
||||
lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
uint64_t time_avg = 0;
|
||||
|
||||
auto out_data = new float[softmax_param->element_size_];
|
||||
lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
out_tensor.SetData(out_data);
|
||||
|
||||
std::vector<lite::tensor::Tensor *> inputs = {&input_tensor, &yt_tensor};
|
||||
std::vector<lite::tensor::Tensor *> outputs = {&out_tensor};
|
||||
|
||||
// float sum_data[6];
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(softmax_param), NULL, desc, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
kernel->Run();
|
||||
}
|
||||
|
||||
int loop_count = 3;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
kernel->Run();
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
time_avg = cost / loop_count;
|
||||
printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
|
||||
std::string output_path = "./test_data/softmax/softmaxgrad_2_out.bin";
|
||||
// auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] yt_data;
|
||||
delete[] out_data;
|
||||
input_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
// delete softmax_param;
|
||||
|
||||
MS_LOG(INFO) << "SoftmaxGradKernelAxis2 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis3) {
|
||||
auto softmax_param = reinterpret_cast<SoftmaxParameter *>(malloc(sizeof(SoftmaxParameter)));
|
||||
// set parameters
|
||||
InitSoftMaxParam(softmax_param, 3);
|
||||
|
||||
std::vector<int> shape = {1, 9, 11, 12};
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/softmax/softmaxgrad_3_yinput.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
input_tensor.SetData(input_data);
|
||||
|
||||
std::string yt_path = "./test_data/softmax/softmaxgrad_3_yt_input.bin";
|
||||
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
|
||||
lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
uint64_t time_avg = 0;
|
||||
|
||||
auto out_data = new float[softmax_param->element_size_];
|
||||
lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
out_tensor.SetData(out_data);
|
||||
|
||||
std::vector<lite::tensor::Tensor *> inputs = {&input_tensor, &yt_tensor};
|
||||
std::vector<lite::tensor::Tensor *> outputs = {&out_tensor};
|
||||
|
||||
// float sum_data[6];
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(softmax_param), NULL, desc, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
kernel->Run();
|
||||
}
|
||||
|
||||
int loop_count = 3;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
kernel->Run();
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
time_avg = cost / loop_count;
|
||||
printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
|
||||
std::string output_path = "./test_data/softmax/softmaxgrad_3_out.bin";
|
||||
// auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] yt_data;
|
||||
delete[] out_data;
|
||||
input_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
// delete softmax_param;
|
||||
|
||||
MS_LOG(INFO) << "SoftmaxGradKernelAxis3 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxisMinus1) {
|
||||
auto softmax_param = reinterpret_cast<SoftmaxParameter *>(malloc(sizeof(SoftmaxParameter)));
|
||||
// set parameters
|
||||
InitSoftMaxParam(softmax_param, -1);
|
||||
|
||||
std::vector<int> shape = {1, 9, 11, 12};
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/softmax/softmaxgrad_-1_yinput.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
input_tensor.SetData(input_data);
|
||||
|
||||
std::string yt_path = "./test_data/softmax/softmaxgrad_-1_yt_input.bin";
|
||||
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
|
||||
lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
uint64_t time_avg = 0;
|
||||
|
||||
auto out_data = new float[softmax_param->element_size_];
|
||||
lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape);
|
||||
out_tensor.SetData(out_data);
|
||||
|
||||
std::vector<lite::tensor::Tensor *> inputs = {&input_tensor, &yt_tensor};
|
||||
std::vector<lite::tensor::Tensor *> outputs = {&out_tensor};
|
||||
|
||||
// float sum_data[6];
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(softmax_param), NULL, desc, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
kernel->Run();
|
||||
}
|
||||
|
||||
int loop_count = 3;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
kernel->Run();
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
time_avg = cost / loop_count;
|
||||
printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
|
||||
std::string output_path = "./test_data/softmax/softmaxgrad_-1_out.bin";
|
||||
// auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] yt_data;
|
||||
delete[] out_data;
|
||||
input_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
// delete softmax_param;
|
||||
|
||||
MS_LOG(INFO) << "SoftmaxGradKernelAxisMinus1 passed";
|
||||
}
|
||||
#endif // kernel testing
|
||||
|
||||
TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) {
|
||||
auto softmax_param = new SoftmaxParameter();
|
||||
// set parameters
|
||||
InitSoftMaxParam(softmax_param, 0);
|
||||
|
||||
int inner_size = 1;
|
||||
if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1;
|
||||
for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) {
|
||||
inner_size *= softmax_param->input_shape_[i];
|
||||
}
|
||||
float *sum_data = new (std::nothrow) float[inner_size];
|
||||
float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]];
|
||||
std::vector<int> shape = {1, 9, 11, 12};
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/softmax/softmaxgrad_yinput.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
std::string yt_path = "./test_data/softmax/softmaxgrad_yt_input.bin";
|
||||
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
uint64_t time_avg = 0;
|
||||
|
||||
auto out_data = new float[softmax_param->element_size_];
|
||||
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param);
|
||||
}
|
||||
|
||||
int loop_count = 3;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param);
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
time_avg = cost / loop_count;
|
||||
printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
|
||||
std::string output_path = "./test_data/softmax/softmaxgrad_out.bin";
|
||||
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] yt_data;
|
||||
delete[] out_data;
|
||||
delete[] sum_data;
|
||||
delete[] sum_mul;
|
||||
|
||||
delete softmax_param;
|
||||
|
||||
MS_LOG(INFO) << "SoftmaxGradAxis0 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis1) {
|
||||
auto softmax_param = new SoftmaxParameter();
|
||||
// set parameters
|
||||
InitSoftMaxParam(softmax_param, 1);
|
||||
|
||||
int inner_size = 1;
|
||||
if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1;
|
||||
for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) {
|
||||
inner_size *= softmax_param->input_shape_[i];
|
||||
}
|
||||
float *sum_data = new (std::nothrow) float[inner_size];
|
||||
float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]];
|
||||
|
||||
std::vector<int> shape = {1, 9, 11, 12};
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/softmax/softmaxgrad_1_yinput.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
std::string yt_path = "./test_data/softmax/softmaxgrad_1_yt_input.bin";
|
||||
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
uint64_t time_avg = 0;
|
||||
|
||||
auto out_data = new float[softmax_param->element_size_];
|
||||
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param);
|
||||
}
|
||||
|
||||
int loop_count = 3;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param);
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
time_avg = cost / loop_count;
|
||||
printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
|
||||
std::string output_path = "./test_data/softmax/softmaxgrad_1_out.bin";
|
||||
// auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] yt_data;
|
||||
delete[] out_data;
|
||||
delete[] sum_data;
|
||||
delete[] sum_mul;
|
||||
|
||||
delete softmax_param;
|
||||
|
||||
MS_LOG(INFO) << "SoftmaxGradAxis1 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis2) {
|
||||
auto softmax_param = new SoftmaxParameter();
|
||||
// set parameters
|
||||
InitSoftMaxParam(softmax_param, 2);
|
||||
|
||||
int inner_size = 1;
|
||||
if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1;
|
||||
for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) {
|
||||
inner_size *= softmax_param->input_shape_[i];
|
||||
}
|
||||
float *sum_data = new (std::nothrow) float[inner_size];
|
||||
float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]];
|
||||
|
||||
std::vector<int> shape = {1, 9, 11, 12};
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/softmax/softmaxgrad_2_yinput.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
std::string yt_path = "./test_data/softmax/softmaxgrad_2_yt_input.bin";
|
||||
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
uint64_t time_avg = 0;
|
||||
|
||||
auto out_data = new float[softmax_param->element_size_];
|
||||
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param);
|
||||
}
|
||||
|
||||
int loop_count = 3;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param);
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
time_avg = cost / loop_count;
|
||||
printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
|
||||
std::string output_path = "./test_data/softmax/softmaxgrad_2_out.bin";
|
||||
// auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] yt_data;
|
||||
delete[] out_data;
|
||||
delete[] sum_data;
|
||||
delete[] sum_mul;
|
||||
|
||||
delete softmax_param;
|
||||
|
||||
MS_LOG(INFO) << "SoftmaxGradAxis2 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis3) {
|
||||
auto softmax_param = new SoftmaxParameter();
|
||||
// set parameters
|
||||
InitSoftMaxParam(softmax_param, 3);
|
||||
|
||||
int inner_size = 1;
|
||||
if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1;
|
||||
for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) {
|
||||
inner_size *= softmax_param->input_shape_[i];
|
||||
}
|
||||
float *sum_data = new (std::nothrow) float[inner_size];
|
||||
float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]];
|
||||
|
||||
std::vector<int> shape = {1, 9, 11, 12};
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/softmax/softmaxgrad_3_yinput.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
std::string yt_path = "./test_data/softmax/softmaxgrad_3_yt_input.bin";
|
||||
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
uint64_t time_avg = 0;
|
||||
|
||||
auto out_data = new float[softmax_param->element_size_];
|
||||
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param);
|
||||
}
|
||||
|
||||
int loop_count = 3;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param);
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
time_avg = cost / loop_count;
|
||||
printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
|
||||
std::string output_path = "./test_data/softmax/softmaxgrad_3_out.bin";
|
||||
// auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] yt_data;
|
||||
delete[] out_data;
|
||||
delete[] sum_data;
|
||||
delete[] sum_mul;
|
||||
|
||||
delete softmax_param;
|
||||
|
||||
MS_LOG(INFO) << "SoftmaxGradAxis3 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxisMinus1) {
|
||||
auto softmax_param = new SoftmaxParameter();
|
||||
// set parameters
|
||||
InitSoftMaxParam(softmax_param, -1);
|
||||
|
||||
int inner_size = 1;
|
||||
if (softmax_param->axis_ == -1) softmax_param->axis_ = softmax_param->n_dim_ - 1;
|
||||
for (int i = softmax_param->axis_ + 1; i < softmax_param->n_dim_; i++) {
|
||||
inner_size *= softmax_param->input_shape_[i];
|
||||
}
|
||||
float *sum_data = new (std::nothrow) float[inner_size];
|
||||
float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]];
|
||||
|
||||
std::vector<int> shape = {1, 9, 11, 12};
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/softmax/softmaxgrad_-1_yinput.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
std::string yt_path = "./test_data/softmax/softmaxgrad_-1_yt_input.bin";
|
||||
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
uint64_t time_avg = 0;
|
||||
|
||||
auto out_data = new float[softmax_param->element_size_];
|
||||
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param);
|
||||
}
|
||||
|
||||
int loop_count = 3;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param);
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
time_avg = cost / loop_count;
|
||||
printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
|
||||
std::string output_path = "./test_data/softmax/softmaxgrad_-1_out.bin";
|
||||
// auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] yt_data;
|
||||
delete[] out_data;
|
||||
delete[] sum_data;
|
||||
delete[] sum_mul;
|
||||
|
||||
delete softmax_param;
|
||||
|
||||
MS_LOG(INFO) << "SoftmaxGradAxisMinus1 passed";
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,3 @@
|
|||
nм*7L╨┴╥И{+╦x√╥.V7{н╣┴7ЖС╓╥сЗь8?xс6р)╟7⌠{╧7с≈╤Л'╗╣Ё:^7Ra%╤М7SШц╥≤О27ЖС╓╢щ:в╥≤О╡╥Dщ╤SШц╤[Дь╤#┼√6╬|╦╢[╦╣ гA╤TЭ≥7гD┼╥hкя6щ:W7Ra%7ЩlС╤h{ю╥┘А58"░╥RЗm╥й■n5╧+╦╥Ё;╢71бP╥мц╠╤╫zэ5/ ╤╬8TЭ╦?xS╥ЖС╓╥>{G╥й■Н7х]3╥{N7с6Н:x7╪"╣б╩53╪7С╨с6Яf▓75оa7{N╥[Дь╤╞_╩6сШ.╥б╩7ЖС╓7:З7эU7^╦д°┤╥гD┼╥Н:x7i|√╤пг=╥(П╥:З╥╕╔57ИтЫ4нш/7⌠╪c╥╬▒68И{+6эU╥
÷Н╥2:С7с7
|
||||
▄·7SШц╥Й7∙aC╣ХyЪ╥╢[87|cк╥╢<
|
||||
8╢[86Р╤TЭ≥╥`╗╢С═Ж7Ё:ч6#N╤!7h7╩л╥D╚╒╥цВ4╥yэ╥!Д⌡╥сЯ6┬;╩7Й▒78"╦(ШJ╥═х6░Шд73;и7?xс4m1P╦n°8
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -56,6 +56,36 @@ void AnfExporter::RemoveIfMakeTuple(const CNodePtr &cnode) {
|
|||
}
|
||||
}
|
||||
|
||||
void AnfExporter::RemoveIfDepend(const CNodePtr &cnode) {
|
||||
bool hasDepend = false;
|
||||
std::vector<AnfNodePtr> inputs;
|
||||
inputs.clear();
|
||||
|
||||
inputs.emplace_back(cnode->input(0));
|
||||
for (size_t i = 1; i < cnode->inputs().size(); ++i) {
|
||||
AnfNodePtr inputNode = cnode->input(i);
|
||||
if (!inputNode->isa<CNode>()) {
|
||||
inputs.emplace_back(cnode->input(i));
|
||||
continue;
|
||||
}
|
||||
auto dependNode = utils::cast<CNodePtr>(inputNode);
|
||||
if (IsPrimitiveCNode(dependNode, schema::PrimitiveType_Depend)) {
|
||||
hasDepend = true;
|
||||
for (size_t j = 1; j < dependNode->inputs().size(); ++j) {
|
||||
AnfNodePtr dependInputNode = dependNode->input(j);
|
||||
if (dependInputNode->isa<CNode>()) {
|
||||
inputs.emplace_back(dependInputNode);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
inputs.emplace_back(cnode->input(i));
|
||||
}
|
||||
}
|
||||
if (hasDepend) {
|
||||
cnode->set_inputs(inputs);
|
||||
}
|
||||
}
|
||||
|
||||
int AnfExporter::ConvertQuantParam(const std::unique_ptr<schema::MetaGraphT> &meta_graph,
|
||||
const std::shared_ptr<PrimitiveC> primitive,
|
||||
const std::unique_ptr<schema::CNodeT> &dst_node) {
|
||||
|
@ -175,10 +205,12 @@ schema::MetaGraphT *AnfExporter::Export(const FuncGraphPtr &func_graph, bool kee
|
|||
return nullptr;
|
||||
}
|
||||
if (primitive_c->Type() == schema::PrimitiveType_TupleGetItem ||
|
||||
primitive_c->Type() == schema::PrimitiveType_MakeTuple) {
|
||||
primitive_c->Type() == schema::PrimitiveType_MakeTuple ||
|
||||
primitive_c->Type() == schema::PrimitiveType_Depend) {
|
||||
continue;
|
||||
}
|
||||
RemoveIfMakeTuple(cnode);
|
||||
RemoveIfDepend(cnode);
|
||||
|
||||
auto primT = primitive_c->GetPrimitiveT();
|
||||
auto node = std::make_unique<schema::CNodeT>();
|
||||
|
@ -336,9 +368,49 @@ int AnfExporter::ConvertInputValueNode(std::shared_ptr<AnfNode> input_anode,
|
|||
output_cnode->inputIndex.emplace_back(meta_graphT->allTensors.size());
|
||||
meta_graphT->allTensors.emplace_back(std::move(paramTensor));
|
||||
} else if (value->isa<mindspore::ValueSequeue>()) {
|
||||
MS_LOG(DEBUG) << "Value type is ValueSequence.";
|
||||
return RET_OK;
|
||||
} else {
|
||||
auto valueAbstract = valueNode->abstract();
|
||||
auto abstractSequnce = utils::cast<abstract::AbstractSequeuePtr>(valueAbstract);
|
||||
if (abstractSequnce->isa<abstract::AbstractTuple>()) {
|
||||
auto abstractTuple = utils::cast<abstract::AbstractTuplePtr>(valueAbstract);
|
||||
auto x_shape_data = abstractTuple->elements();
|
||||
std::vector<int32_t> shape;
|
||||
for (std::size_t i = 0; i < abstractTuple->size(); ++i) {
|
||||
auto value_track = x_shape_data[i]->GetValueTrack();
|
||||
MS_EXCEPTION_IF_NULL(value_track);
|
||||
if (value_track->isa<Int32Imm>()) {
|
||||
shape.push_back((GetValue<int>(value_track)));
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Value type is ValueSequence is not integer, it is "
|
||||
<< value_track->ToString() << ".";
|
||||
}
|
||||
}
|
||||
if (shape.size()) {
|
||||
auto typePtr = abstractTuple->elements()[0]->GetTypeTrack(); // abstractTuple->GetTypeTrack();
|
||||
paramTensor->dataType = typePtr->type_id();
|
||||
paramTensor->dims = {static_cast<int32_t>(shape.size())};
|
||||
paramTensor->nodeType = schema::NodeType_ValueNode;
|
||||
paramTensor->data.resize(shape.size() * sizeof(int));
|
||||
memcpy(paramTensor->data.data(), shape.data(), shape.size() * sizeof(int));
|
||||
node_id_map_[valueNode->fullname_with_scope()] = meta_graphT->allTensors.size();
|
||||
output_cnode->inputIndex.emplace_back(meta_graphT->allTensors.size());
|
||||
meta_graphT->allTensors.emplace_back(std::move(paramTensor));
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Value type is ValueSequence not supported - " << valueAbstract->type_name() << ".";
|
||||
}
|
||||
} else if (value->isa<mindspore::BoolImm>()) {
|
||||
auto valueAbstract = valueNode->abstract();
|
||||
auto abstractScalar = utils::cast<abstract::AbstractScalarPtr>(valueAbstract);
|
||||
auto typePtr = abstractScalar->GetTypeTrack();
|
||||
paramTensor->dataType = typePtr->type_id();
|
||||
paramTensor->dims = {1};
|
||||
paramTensor->nodeType = schema::NodeType_ValueNode;
|
||||
auto data = value->cast<mindspore::BoolImmPtr>();
|
||||
paramTensor->data.emplace_back(data->value());
|
||||
node_id_map_[valueNode->fullname_with_scope()] = meta_graphT->allTensors.size();
|
||||
output_cnode->inputIndex.emplace_back(meta_graphT->allTensors.size());
|
||||
meta_graphT->allTensors.emplace_back(std::move(paramTensor));
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Not support value type , need add support.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ class AnfExporter {
|
|||
int SetOpInputNode(const CNodePtr &cnode, const std::unique_ptr<schema::MetaGraphT> &meta_graphT,
|
||||
schema::CNodeT *fb_node);
|
||||
void RemoveIfMakeTuple(const CNodePtr &cnode);
|
||||
void RemoveIfDepend(const CNodePtr &cnode);
|
||||
|
||||
protected:
|
||||
int ConvertInputCNode(const std::shared_ptr<AnfNode> input_anode, schema::CNodeT *output_cnode);
|
||||
|
|
|
@ -30,6 +30,7 @@ static const std::vector<schema::PrimitiveType> nhwcOpList = {
|
|||
schema::PrimitiveType_Conv2DGradInput,
|
||||
schema::PrimitiveType_PoolingGrad,
|
||||
schema::PrimitiveType_BiasGrad,
|
||||
schema::PrimitiveType_BNGrad,
|
||||
#endif
|
||||
schema::PrimitiveType_Conv2D,
|
||||
schema::PrimitiveType_DeConv2D,
|
||||
|
@ -39,7 +40,20 @@ static const std::vector<schema::PrimitiveType> nhwcOpList = {
|
|||
schema::PrimitiveType_Resize,
|
||||
schema::PrimitiveType_BatchNorm,
|
||||
schema::PrimitiveType_FusedBatchNorm,
|
||||
schema::PrimitiveType_PReLU};
|
||||
schema::PrimitiveType_PReLU,
|
||||
schema::PrimitiveType_BiasAdd};
|
||||
|
||||
static const std::vector<schema::PrimitiveType> nhwcOpDualInputList = {
|
||||
#ifdef SUPPORT_TRAIN
|
||||
schema::PrimitiveType_Conv2DGradFilter
|
||||
#endif
|
||||
};
|
||||
|
||||
static const std::vector<schema::PrimitiveType> nhwcOpAllInputList = {
|
||||
#ifdef SUPPORT_TRAIN
|
||||
schema::PrimitiveType_PoolingGrad
|
||||
#endif
|
||||
};
|
||||
|
||||
static const std::vector<schema::PrimitiveType> fp32FullOpList = {
|
||||
schema::PrimitiveType_Concat, schema::PrimitiveType_Add,
|
||||
|
@ -73,6 +87,10 @@ std::vector<schema::PrimitiveType> Getfp32FullOpList() { return fp32FullOpList;
|
|||
|
||||
std::vector<schema::PrimitiveType> GetNhwcOpList() { return nhwcOpList; }
|
||||
|
||||
std::vector<schema::PrimitiveType> GetNhwcDualInputOpList() { return nhwcOpDualInputList; }
|
||||
|
||||
std::vector<schema::PrimitiveType> GetNhwcAllInputOpList() { return nhwcOpAllInputList; }
|
||||
|
||||
std::vector<schema::PrimitiveType> GetUint8NhwcOpList() { return int8NeedNhwcOpList; }
|
||||
|
||||
std::vector<schema::PrimitiveType> GetUint8OpList() { return int8OpList; }
|
||||
|
|
|
@ -36,6 +36,8 @@ std::vector<schema::PrimitiveType> GetNhwcOpList();
|
|||
|
||||
std::vector<schema::PrimitiveType> GetNhwcDualInputOpList();
|
||||
|
||||
std::vector<schema::PrimitiveType> GetNhwcAllInputOpList();
|
||||
|
||||
std::vector<schema::PrimitiveType> Getfp32FullOpList();
|
||||
|
||||
std::vector<schema::PrimitiveType> GetUint8NhwcOpList();
|
||||
|
|
|
@ -40,17 +40,24 @@ FuncGraphPtr AnfTransform::Transform(const FuncGraphPtr &old_graph, const conver
|
|||
// fusion const_fold
|
||||
auto optimizer = std::make_shared<opt::GraphOptimizer>();
|
||||
auto pm = std::make_shared<opt::PassManager>("anf fusion pass manager", false);
|
||||
pm->AddPass(std::make_shared<opt::ConvBiasaddFusion>());
|
||||
pm->AddPass(std::make_shared<opt::ConvBatchNormFusion>());
|
||||
pm->AddPass(std::make_shared<opt::ConvScaleFusion>());
|
||||
pm->AddPass(std::make_shared<opt::ConvActivationFusion>(true, "conv_relu", schema::PrimitiveType_Activation,
|
||||
schema::ActivationType_RELU));
|
||||
pm->AddPass(std::make_shared<opt::ConvActivationFusion>(true, "conv_relu6", schema::PrimitiveType_Activation,
|
||||
schema::ActivationType_RELU6));
|
||||
pm->AddPass(std::make_shared<opt::ConvTupleActivationFusion>(
|
||||
true, "conv_tuple_relu", schema::PrimitiveType_Activation, schema::ActivationType_RELU));
|
||||
pm->AddPass(std::make_shared<opt::ConvTupleActivationFusion>(
|
||||
true, "conv_tuple_relu6", schema::PrimitiveType_Activation, schema::ActivationType_RELU6));
|
||||
|
||||
// for now - trainning is not supporting fuse operations
|
||||
if (config != nullptr && config->trainModel == false) {
|
||||
pm->AddPass(std::make_shared<opt::ConvBiasaddFusion>());
|
||||
pm->AddPass(std::make_shared<opt::ConvBatchNormFusion>());
|
||||
pm->AddPass(std::make_shared<opt::ConvScaleFusion>());
|
||||
pm->AddPass(std::make_shared<opt::ConvActivationFusion>(true, "conv_relu", schema::PrimitiveType_Activation,
|
||||
schema::ActivationType_RELU));
|
||||
pm->AddPass(std::make_shared<opt::ConvActivationFusion>(true, "conv_relu6", schema::PrimitiveType_Activation,
|
||||
schema::ActivationType_RELU6));
|
||||
pm->AddPass(std::make_shared<opt::ConvTupleActivationFusion>(true, "conv_tuple_relu",
|
||||
schema::PrimitiveType_Activation,
|
||||
schema::ActivationType_RELU));
|
||||
pm->AddPass(std::make_shared<opt::ConvTupleActivationFusion>(true, "conv_tuple_relu6",
|
||||
schema::PrimitiveType_Activation,
|
||||
schema::ActivationType_RELU6));
|
||||
}
|
||||
|
||||
pm->AddPass(std::make_shared<opt::ConstFoldPass>());
|
||||
optimizer->AddPassManager(pm);
|
||||
FuncGraphPtr new_graph = optimizer->Optimize(old_graph);
|
||||
|
|
|
@ -41,6 +41,8 @@ Flags::Flags() {
|
|||
"16");
|
||||
AddFlag(&Flags::configFile, "config_file", "Configuration for post-training.", "");
|
||||
AddFlag(&Flags::formatTrans, "formatTrans", "whether transform format. true | false", "true");
|
||||
AddFlag(&Flags::trainModelIn, "trainModel", "whether the model is going to be trained on device."
|
||||
" true | false", "false");
|
||||
}
|
||||
|
||||
int Flags::Init(int argc, const char **argv) {
|
||||
|
@ -128,6 +130,15 @@ int Flags::Init(int argc, const char **argv) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
|
||||
if (this->trainModelIn == "true") {
|
||||
this->trainModel = true;
|
||||
} else if (this->trainModelIn == "false") {
|
||||
this->trainModel = false;
|
||||
} else {
|
||||
std::cerr << "INPUT ILLEGAL: trainModel must be true|false ";
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
} // namespace converter
|
||||
|
|
|
@ -68,6 +68,8 @@ class Flags : public virtual mindspore::lite::FlagParser {
|
|||
std::string configFile;
|
||||
bool formatTrans = true;
|
||||
std::string convWeightQuantChannelThreshold;
|
||||
std::string trainModelIn;
|
||||
bool trainModel = false;
|
||||
};
|
||||
} // namespace converter
|
||||
} // namespace lite
|
||||
|
|
|
@ -146,11 +146,29 @@ STATUS FormatTransPass::DoNodeInoutFormatTrans(schema::MetaGraphT *graph) {
|
|||
MS_LOG(ERROR) << "InsertNhwc2NchwNode before " << nodeName << "failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
iter = InsertFormatTransNode(graph, iter, kAfter, 0, afterNodeType, &status);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "InsertNhwc2NchwNode after " << nodeName << "failed";
|
||||
return RET_ERROR;
|
||||
if (IsContain(GetNhwcAllInputOpList(), GetCNodeTType(**iter))) {
|
||||
int idx_num = node->inputIndex.size();
|
||||
for (int i = 0; i < idx_num; i++) {
|
||||
iter = InsertFormatTransNode(graph, iter, kBefore, i, beforeNodeType, &status);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "InsertNchw2NhwcNode before " << nodeName << "failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
} else if (IsContain(GetNhwcDualInputOpList(), GetCNodeTType(**iter))) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
iter = InsertFormatTransNode(graph, iter, kBefore, i, beforeNodeType, &status);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "InsertNchw2NhwcNode before " << nodeName << "failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
iter = InsertFormatTransNode(graph, iter, kAfter, 0, afterNodeType, &status);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "InsertNhwc2NchwNode after " << nodeName << "failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
|
|
Loading…
Reference in New Issue