train on device
This commit is contained in:
parent
12109600fc
commit
b601603b55
|
@ -69,6 +69,7 @@ class MS_API Model {
|
|||
|
||||
/// \brief Free MetaGraph in MindSpore Lite Model.
|
||||
void FreeMetaGraph();
|
||||
ModelImpl *model_impl() {return model_impl_;}
|
||||
|
||||
protected:
|
||||
ModelImpl *model_impl_ = nullptr;
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_INCLUDE_TRAIN_SESSION_H_
|
||||
#define MINDSPORE_LITE_INCLUDE_TRAIN_SESSION_H_
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
// #include "include/lite_session.h"
|
||||
#include "src/lite_session.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
class Model;
|
||||
}
|
||||
namespace lite::tensor {
|
||||
class Tensor;
|
||||
}
|
||||
namespace session {
|
||||
|
||||
class TrainSession : public lite::LiteSession {
|
||||
public:
|
||||
TrainSession();
|
||||
~TrainSession() = default;
|
||||
|
||||
int RunGraph(const session::KernelCallBack &before = nullptr,
|
||||
const session::KernelCallBack &after = nullptr) override;
|
||||
|
||||
int CompileGraph(lite::Model *model) override;
|
||||
virtual void ReplaceOps();
|
||||
virtual void* ExportToBuf(void* buf, size_t* len) const;
|
||||
|
||||
std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> GetOutputs() const;
|
||||
std::vector<tensor::MSTensor *> GetOutputsByName(const std::string &node_name) const;
|
||||
|
||||
virtual void train();
|
||||
bool is_train() { return train_mode_ == true; }
|
||||
virtual void eval();
|
||||
bool is_eval() { return train_mode_ == false; }
|
||||
|
||||
protected:
|
||||
bool train_mode_ = false;
|
||||
lite::Model* model_ = nullptr;
|
||||
std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> ext_output_map_;
|
||||
|
||||
|
||||
// private:
|
||||
};
|
||||
} // namespace session
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_INCLUDE_TRAIN_SESSION_H_
|
|
@ -13,9 +13,14 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "nnacl/activation_grad.h"
|
||||
|
||||
int ReluGrad(float *src0, float *src1, int length, float *dst) {
|
||||
#include <math.h>
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/fp32/arithmetic.h"
|
||||
#include "nnacl/fp32_grad/activation_grad.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
inline int ReluGrad(float *src0, float *src1, int length, float *dst) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
dst[i] = src1[i] > 0 ? 1.0f : 0.0f;
|
||||
}
|
|
@ -13,11 +13,11 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "nnacl/fp32_grad/batch_norm.h"
|
||||
|
||||
static void sumSpatialBatch(const float *in, int size, int ch, float *out) {
|
||||
void sumSpatialBatch(const float *in, int size, int ch, float *out) {
|
||||
memset(out, 0, ch * sizeof(float));
|
||||
for (int i = 0; i < size; i++) {
|
||||
const float *ptr = in + i * ch;
|
||||
|
@ -32,49 +32,53 @@ void scaleBias(const float *scales, int batch, int n, int size, float *output) {
|
|||
for (int c = 0; c < n; c++) output[i * n + c] *= scales[c];
|
||||
}
|
||||
|
||||
void normalize(const float *x, const float *mean, const float *variance, float eps, int batch, int filters, int spatial,
|
||||
void normalize(const float *x, const float *mean, const float *invar, int batch, int filters, int spatial,
|
||||
float *out) {
|
||||
int b, f, i;
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (i = 0; i < spatial; ++i) {
|
||||
for (f = 0; f < filters; ++f) {
|
||||
int index = b * filters * spatial + i * filters + f;
|
||||
out[index] = (x[index] - mean[f]) / (sqrt(variance[f]) + eps);
|
||||
out[index] = (x[index] - mean[f]) * invar[f];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void backwardScale(const float *x_norm, const float *delta, int batch, int n, int size, float *scale_updates) {
|
||||
void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch,
|
||||
int n, int size, float *scale_updates) {
|
||||
int i, b, f;
|
||||
memset(scale_updates, 0, n * sizeof(float));
|
||||
for (b = 0; b < batch; ++b) {
|
||||
for (i = 0; i < size; ++i) {
|
||||
for (f = 0; f < n; ++f) {
|
||||
int index = (b * size + i) * n + f;
|
||||
scale_updates[f] += delta[index] * x_norm[index];
|
||||
float x_norm = (x[index] - mean[f]) * invar[f];
|
||||
scale_updates[f] += delta[index] * x_norm;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void meanVar(const float *in, int batch, int spatial, int ch, float *mean, float *var) {
|
||||
void meanVar(const float *in, int batch, int spatial, int ch, float eps, float *mean, float *invar) {
|
||||
float N = batch * spatial;
|
||||
sumSpatialBatch(in, N, ch, mean);
|
||||
for (int f = 0; f < ch; ++f) mean[f] /= N;
|
||||
memset(var, 0, ch * sizeof(float));
|
||||
for (int i = 0; i < N; i++) {
|
||||
for (int f = 0; f < ch; f++) {
|
||||
float x = in[i * ch + f];
|
||||
var[f] += (x - mean[f]) * (x - mean[f]);
|
||||
}
|
||||
for (int f = 0; f < ch; ++f) {
|
||||
mean[f] /= N;
|
||||
}
|
||||
for (int f=0; f< ch; f++) {
|
||||
float tvar = 0;
|
||||
for (int i =0; i< N; i++) {
|
||||
float x = in[i*ch +f];
|
||||
tvar += (x-mean[f]) *(x-mean[f]);
|
||||
}
|
||||
invar[f] = 1.0f/(sqrt(tvar/N+eps));
|
||||
}
|
||||
for (int f = 0; f < ch; f++) var[f] /= N;
|
||||
}
|
||||
|
||||
void meanDelta(float *yt, int size, int ch, float eps, float *variance, float *mean_delta) {
|
||||
void meanDelta(float *yt, int size, int ch, float *invar, float *mean_delta) {
|
||||
sumSpatialBatch(yt, size, ch, mean_delta);
|
||||
for (int i = 0; i < ch; i++) mean_delta[i] *= -1.f / sqrt((variance[i] + eps));
|
||||
for (int i = 0; i < ch; i++) mean_delta[i] *= -invar[i];
|
||||
}
|
||||
|
||||
void meanAdd(const float *x, const float *mean, const float *variance_delta, int batch, int filters, int spatial,
|
||||
|
@ -93,8 +97,8 @@ void meanAdd(const float *x, const float *mean, const float *variance_delta, int
|
|||
}
|
||||
}
|
||||
|
||||
void varianceDelta(const float *x, const float *delta, const float *mean, const float *variance, int batch, int filters,
|
||||
int spatial, float eps, float *variance_delta) {
|
||||
void varianceDelta(const float *x, const float *delta, const float *mean, const float *invar, int batch, int filters,
|
||||
int spatial, float *variance_delta) {
|
||||
int i, k;
|
||||
memset(variance_delta, 0, filters * sizeof(float));
|
||||
for (k = 0; k < batch * spatial; k++) {
|
||||
|
@ -103,16 +107,16 @@ void varianceDelta(const float *x, const float *delta, const float *mean, const
|
|||
variance_delta[i] += delta[index] * (x[index] - mean[i]);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < filters; i++) variance_delta[i] *= -.5 * pow(variance[i] + eps, (-3.f / 2.f));
|
||||
for (i = 0; i < filters; i++) variance_delta[i] *= -.5 * 1.0f/(invar[i]*invar[i]*invar[i]);
|
||||
}
|
||||
|
||||
void NormalizeDelta(const float *x, const float *mean, const float *variance, const float *mean_delta,
|
||||
const float *variance_delta, int batch, int filters, int spatial, float eps, float *delta) {
|
||||
void NormalizeDelta(const float *x, const float *mean, const float *invar, const float *mean_delta,
|
||||
const float *variance_delta, int batch, int filters, int spatial, float *delta) {
|
||||
int f, k;
|
||||
for (k = 0; k < batch * spatial; k++) {
|
||||
for (f = 0; f < filters; f++) {
|
||||
int index = k * filters + f;
|
||||
delta[index] = delta[index] * 1. / (sqrt(variance[f] + eps)) +
|
||||
delta[index] = delta[index] * invar[f] +
|
||||
variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) +
|
||||
mean_delta[f] / (spatial * batch);
|
||||
}
|
||||
|
|
|
@ -17,28 +17,33 @@
|
|||
#ifndef MINDSPORE_LITE_NNACL_FP32_BATCH_NORM_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP32_BATCH_NORM_H_
|
||||
|
||||
typedef struct bnParameter {
|
||||
int batch;
|
||||
int channels;
|
||||
int spatial;
|
||||
float eps;
|
||||
} bnParameter;
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
typedef struct BNGradParameter {
|
||||
OpParameter op_parameter_;
|
||||
float epsilon_;
|
||||
float momentum_;
|
||||
} BNGradParameter;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
void sumSpatialBatch(const float *in, int size, int ch, float *out);
|
||||
void scaleBias(const float *scales, int batch, int n, int size, float *output);
|
||||
void normalize(const float *x, const float *mean, const float *variance, float eps, int batch, int filters, int spatial,
|
||||
void normalize(const float *x, const float *mean, const float *invar, int batch, int filters, int spatial,
|
||||
float *out);
|
||||
void backwardScale(const float *x_norm, const float *delta, int batch, int n, int size, float *scale_updates);
|
||||
void meanVar(const float *in, int batch, int size, int ch, float *mean, float *var);
|
||||
void meanDelta(float *yt, int size, int ch, float eps, float *variance, float *mean_delta);
|
||||
void varianceDelta(const float *x, const float *delta, const float *mean, const float *variance, int batch, int ch,
|
||||
int spatial, float eps, float *variance_delta);
|
||||
void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch,
|
||||
int n, int size, float *scale_updates);
|
||||
void meanVar(const float *in, int batch, int size, int ch, float eps, float *mean, float *invar);
|
||||
void meanDelta(float *yt, int size, int ch, float *invar, float *mean_delta);
|
||||
void varianceDelta(const float *x, const float *delta, const float *mean, const float *invar, int batch, int ch,
|
||||
int spatial, float *variance_delta);
|
||||
void meanAdd(const float *x, const float *mean, const float *variance_delta, int batch, int filters, int spatial,
|
||||
float *mean_add, float *mean_delta);
|
||||
void NormalizeDelta(const float *x, const float *mean, const float *variance, const float *mean_delta,
|
||||
const float *variance_delta, int batch, int filters, int spatial, float eps, float *delta);
|
||||
void NormalizeDelta(const float *x, const float *mean, const float *invar, const float *mean_delta,
|
||||
const float *variance_delta, int batch, int filters, int spatial, float *delta);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -125,9 +125,9 @@ void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param
|
|||
}
|
||||
|
||||
void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param) {
|
||||
const int pad_left = /*conv_param->pad_l_*/ conv_param->pad_w_;
|
||||
const int pad_left = /*conv_param->pad_l_*/ conv_param->pad_l_;
|
||||
// const int pad_right = /*conv_param->pad_r_*/conv_param->pad_w_;
|
||||
const int pad_up = /*conv_param->pad_u_*/ conv_param->pad_h_;
|
||||
const int pad_up = /*conv_param->pad_u_*/ conv_param->pad_u_;
|
||||
// const int pad_down = /*conv_param->pad_d/*/conv_param->pad_h_;
|
||||
|
||||
const int stride_h = conv_param->stride_h_;
|
||||
|
|
|
@ -13,7 +13,8 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <cstdint>
|
||||
#include <stdint.h>
|
||||
#include <float.h>
|
||||
#include "nnacl/fp32_grad/pooling_grad.h"
|
||||
|
||||
void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param) {
|
||||
|
@ -31,33 +32,37 @@ void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter
|
|||
int output_batch = pooling_param->output_batch_;
|
||||
|
||||
const float *inPtr = NULL;
|
||||
for (int i = 0; i < output_h * output_w * channel * output_batch; i++) output_ptr[i] = 0.0;
|
||||
// for (int i = 0; i < output_h * output_w * channel * output_batch; i++) output_ptr[i] = 0.0;
|
||||
for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0;
|
||||
|
||||
float kk = (float)(win_h * win_w);
|
||||
|
||||
for (uint16_t ib = 0; ib < output_batch; ib++) {
|
||||
float *out;
|
||||
out = &output_ptr[(ib * output_h * output_w)];
|
||||
inPtr = (float *)(&input_ptr[(ib * in_h * in_w)]);
|
||||
// out = &output_ptr[(ib * output_h * output_w)];
|
||||
out = &output_ptr[(ib * in_h * in_w * channel)];
|
||||
// inPtr = (float *)(&input_ptr[(ib * in_h * in_w)]);
|
||||
inPtr = (float *)(&input_ptr[(ib * output_h * output_w * channel)]);
|
||||
if (1) { // in->layout() == Tensor::nhwc)
|
||||
// iterate over yt
|
||||
for (uint16_t yh = 0; yh < in_h; yh++) {
|
||||
for (uint16_t yw = 0; yw < in_w; yw++) {
|
||||
for (uint16_t yh = 0; yh < output_h; yh++) {
|
||||
for (uint16_t yw = 0; yw < output_w; yw++) {
|
||||
for (uint16_t ic = 0; ic < channel; ic++) {
|
||||
int idx = (yw + yh * in_w) * channel + ic; // (ic*in_h*in_w) + (in_w*yh) + yw;
|
||||
int idx = (yw + yh * output_w) * channel + ic; // (ic*in_h*in_w) + (in_w*yh) + yw;
|
||||
float delta = inPtr[idx] / kk;
|
||||
for (int32_t kh = 0; kh < win_h; kh++) {
|
||||
int xh = yh * stride_h + kh - pad_h;
|
||||
if ((xh < 0) || (xh >= output_h)) {
|
||||
if ((xh < 0) || (xh >= in_h)) {
|
||||
continue;
|
||||
}
|
||||
for (int32_t kw = 0; kw < win_w; kw++) {
|
||||
int xw = yw * stride_w + kw - pad_w;
|
||||
if ((xw < 0) || (xw >= output_w)) {
|
||||
if ((xw < 0) || (xw >= in_w)) {
|
||||
continue;
|
||||
}
|
||||
// out[(ic*output_h*output_w) + (xh*output_w) + xw] += delta;
|
||||
out[(xw + output_w * xh) * channel + ic] += delta;
|
||||
|
||||
// out[(xw + output_w * xh) * channel + ic] += delta;
|
||||
out[(xw + in_w * xh) * channel + ic] += delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -66,21 +71,22 @@ void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter
|
|||
} else { // nchw
|
||||
for (uint16_t ic = 0; ic < channel; ic++) {
|
||||
// iterate over yt
|
||||
for (uint16_t yh = 0; yh < in_h; yh++) {
|
||||
for (uint16_t yw = 0; yw < in_w; yw++) {
|
||||
int idx = (ic * in_h * in_w) + (in_w * yh) + yw;
|
||||
for (uint16_t yh = 0; yh < output_h; yh++) {
|
||||
for (uint16_t yw = 0; yw < output_w; yw++) {
|
||||
int idx = (ic * output_h * output_w) + (output_w * yh) + yw;
|
||||
float delta = inPtr[idx] / kk;
|
||||
for (int32_t kh = 0; kh < win_h; kh++) {
|
||||
int xh = yh * stride_h + kh - pad_h;
|
||||
if ((xh < 0) || (xh >= output_h)) {
|
||||
if ((xh < 0) || (xh >= in_h)) {
|
||||
continue;
|
||||
}
|
||||
for (int32_t kw = 0; kw < win_w; kw++) {
|
||||
int xw = yw * stride_w + kw - pad_w;
|
||||
if ((xw < 0) || (xw >= output_w)) {
|
||||
if ((xw < 0) || (xw >= in_w)) {
|
||||
continue;
|
||||
}
|
||||
out[(ic * output_h * output_w) + (xh * output_w) + xw] += delta;
|
||||
// out[(ic * output_h * output_w) + (xh * output_w) + xw] += delta;
|
||||
out[(ic * in_h * in_w) + (xh * in_w) + xw] += delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -90,7 +96,14 @@ void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter
|
|||
}
|
||||
}
|
||||
|
||||
void MaxPoolingGrad(const float *dy, const int *indices, float *output_ptr, PoolingParameter *pooling_param) {
|
||||
void MaxPoolingGrad(const float *input_ptr, const float *dx_ptr, const float *dy_ptr, float *output_ptr,
|
||||
PoolingParameter *pooling_param) {
|
||||
int stride_w = pooling_param->stride_w_;
|
||||
int stride_h = pooling_param->stride_h_;
|
||||
int pad_w = pooling_param->pad_l_;
|
||||
int pad_h = pooling_param->pad_u_;
|
||||
int win_w = pooling_param->window_w_;
|
||||
int win_h = pooling_param->window_h_;
|
||||
int channel = pooling_param->input_channel_;
|
||||
int in_w = pooling_param->input_w_;
|
||||
int in_h = pooling_param->input_h_;
|
||||
|
@ -98,38 +111,73 @@ void MaxPoolingGrad(const float *dy, const int *indices, float *output_ptr, Pool
|
|||
int output_h = pooling_param->output_h_;
|
||||
int output_batch = pooling_param->output_batch_;
|
||||
|
||||
int out_img_size =
|
||||
output_h * output_w; // Emir -- in original code this varible is calculated according to input size ??
|
||||
int ind_img_size = in_h * in_w;
|
||||
// const int w_pad = (output_w + pad_w + pad_w);
|
||||
const float *inPtr;
|
||||
const float *dyPtr;
|
||||
|
||||
for (int i = 0; i < output_h * output_w * channel * output_batch; i++) output_ptr[i] = 0.0;
|
||||
for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0;
|
||||
|
||||
const float *yt = (const float *)(dy);
|
||||
const int *pos = (const int *)(indices);
|
||||
float *out = NULL;
|
||||
for (uint16_t ib = 0; ib < output_batch; ib++) {
|
||||
float *out;
|
||||
out = &output_ptr[(ib * in_h * in_w * channel)];
|
||||
inPtr = (const float *)(&input_ptr[(ib * in_h * in_w * channel)]);
|
||||
dyPtr = (const float *)(&dy_ptr[(ib * output_h * output_w * channel)]);
|
||||
|
||||
if (1) { // grads->layout() == Tensor::nhwc)
|
||||
for (int ib = 0; ib < output_batch; ib++) {
|
||||
out = &(output_ptr[ib * output_w * output_w * channel]);
|
||||
for (int ix = 0; ix < ind_img_size; ix++) {
|
||||
for (int cix = 0; cix < channel; cix++) {
|
||||
int idx = (*pos) * channel + cix;
|
||||
out[idx] += *yt;
|
||||
pos++;
|
||||
yt++;
|
||||
if (1) { // nhwc
|
||||
for (uint16_t yh = 0; yh < output_h; yh++) {
|
||||
for (uint16_t yw = 0; yw < output_w; yw++) {
|
||||
for (uint16_t ic = 0; ic < channel; ic++) {
|
||||
int idx = (yw + yh * output_w) * channel + ic;
|
||||
|
||||
float delta = dyPtr[idx];
|
||||
float max_val = -FLT_MAX;
|
||||
int max_idx = 0;
|
||||
for (int32_t kh = 0; kh < win_h; kh++) {
|
||||
int xh = yh * stride_h + kh - pad_h;
|
||||
if ((xh < 0) || (xh >= in_h)) {
|
||||
continue;
|
||||
}
|
||||
for (int32_t kw = 0; kw < win_w; kw++) {
|
||||
int xw = yw * stride_w + kw - pad_w;
|
||||
if ((xw < 0) || (xw >= in_w)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inPtr[(xw + in_w * xh) * channel + ic] > max_val) {
|
||||
max_val = inPtr[(xw + in_w * xh) * channel + ic];
|
||||
max_idx = (xw + in_w * xh) * channel + ic;
|
||||
}
|
||||
}
|
||||
}
|
||||
out[max_idx] += delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int ib = 0; ib < output_batch; ib++) {
|
||||
out = &output_ptr[(ib * out_img_size)];
|
||||
for (int cix = 0; cix < channel; cix++) {
|
||||
for (int ix = 0; ix < ind_img_size; ix++) {
|
||||
int idx = cix * output_h * output_w + *pos; // cord_y*output_w + cord_x;
|
||||
out[idx] += *yt;
|
||||
pos++;
|
||||
yt++;
|
||||
} else { // nchw
|
||||
for (uint16_t yh = 0; yh < output_h; yh++) {
|
||||
for (uint16_t yw = 0; yw < output_w; yw++) {
|
||||
for (uint16_t ic = 0; ic < channel; ic++) {
|
||||
int idx = (ic * output_h * output_w) + (output_w * yh) + yw;
|
||||
float delta = dyPtr[idx];
|
||||
float max_val = -FLT_MAX;
|
||||
int max_idx = 0;
|
||||
for (int32_t kh = 0; kh < win_h; kh++) {
|
||||
int xh = yh * stride_h + kh - pad_h;
|
||||
if ((xh < 0) || (xh >= in_h)) {
|
||||
continue;
|
||||
}
|
||||
for (int32_t kw = 0; kw < win_w; kw++) {
|
||||
int xw = yw * stride_w + kw - pad_w;
|
||||
if ((xw < 0) || (xw >= in_w)) {
|
||||
continue;
|
||||
}
|
||||
if (inPtr[(ic * in_h * in_w) + (xh * in_w) + xw] > max_val) {
|
||||
max_val = inPtr[(ic * in_h * in_w) + (xh * in_w) + xw];
|
||||
max_idx = (ic * in_h * in_w) + (xh * in_w) + xw;
|
||||
}
|
||||
}
|
||||
}
|
||||
out[max_idx] += delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,9 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param);
|
||||
void MaxPoolingGrad(const float *dy, const int *indices_ptr, float *output_ptr, PoolingParameter *pooling_param);
|
||||
// void MaxPoolingGrad(const float *dy, const int *indices_ptr, float *output_ptr, PoolingParameter *pooling_param);
|
||||
void MaxPoolingGrad(const float *input_ptr, const float *dx_ptr, const float *dy_ptr, float *output_ptr,
|
||||
PoolingParameter *pooling_param);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -13,10 +13,10 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "nnacl/fp32_grad/reduce_grad.h"
|
||||
|
||||
static inline bool NextIndex(const int num_dims, const int *dims, int *current) {
|
||||
static inline int NextIndex(const int num_dims, const int *dims, int *current) {
|
||||
int carry = 1;
|
||||
for (int idx = num_dims - 1; idx >= 0; --idx) {
|
||||
int current_val = current[idx] + carry;
|
||||
|
@ -45,10 +45,10 @@ static inline size_t GetOutputOffset(const int num_dims, const int *dims, const
|
|||
size_t offset = 0;
|
||||
for (int idx = 0; idx < num_dims; ++idx) {
|
||||
// if we need to skip this axis
|
||||
bool is_axis = false;
|
||||
int is_axis = 0;
|
||||
for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) {
|
||||
if (idx == axes[axis_idx]) {
|
||||
is_axis = true;
|
||||
is_axis = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -101,10 +101,10 @@ float ReduceMeanAll(const float *src, int size) {
|
|||
|
||||
void ReduceSumByAxes(const float *input, const int *input_dims, float *output, const int *output_dims, int num_dims) {
|
||||
int num_outputs = 1;
|
||||
int same_shape = true;
|
||||
int same_shape = 1;
|
||||
for (int idx = 0; idx < num_dims; ++idx) {
|
||||
num_outputs *= output_dims[idx];
|
||||
if (output_dims[idx] != input_dims[idx]) same_shape = false;
|
||||
if (output_dims[idx] != input_dims[idx]) same_shape = 0;
|
||||
}
|
||||
if (same_shape) {
|
||||
memcpy(output, input, num_outputs * sizeof(float));
|
||||
|
|
|
@ -17,8 +17,7 @@
|
|||
#ifndef MINDSPORE_LITE_NNACL_FP32_REDUCE_GRAD_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP32_REDUCE_GRAD_H_
|
||||
|
||||
#include <cstddef.h>
|
||||
#include <algorithm.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include "nnacl/op_base.h"
|
||||
|
||||
typedef struct SoftmaxCrossEntropyParameter {
|
||||
OpParameter op_parameter;
|
||||
OpParameter op_parameter_;
|
||||
int32_t batch_size_;
|
||||
unsigned int number_of_classes_;
|
||||
int n_dim_;
|
||||
|
|
|
@ -178,8 +178,8 @@ union PrimitiveType {
|
|||
Conv2DGradFilter,
|
||||
Conv2DGradInput,
|
||||
PoolingGrad,
|
||||
BNGradInput,
|
||||
OptMomentum,
|
||||
BNGrad,
|
||||
ApplyMomentum,
|
||||
BiasGrad,
|
||||
SoftmaxCrossEntropy,
|
||||
AddGrad,
|
||||
|
@ -190,6 +190,7 @@ union PrimitiveType {
|
|||
ActivationGrad,
|
||||
PriorBox,
|
||||
SpaceToBatchND,
|
||||
Depend,
|
||||
Return,
|
||||
MakeTuple,
|
||||
ToFormat,
|
||||
|
|
|
@ -149,7 +149,8 @@ table Activation {
|
|||
alpha: float = 0.2;
|
||||
}
|
||||
table ActivationGrad {
|
||||
type: ActivationGradType = 0;
|
||||
type: ActivationType = 0;
|
||||
alpha: float = 0.2;
|
||||
}
|
||||
|
||||
|
||||
|
@ -230,6 +231,9 @@ table SoftmaxCrossEntropy {
|
|||
axis: [int];
|
||||
}
|
||||
|
||||
table make_tuple {
|
||||
}
|
||||
|
||||
|
||||
table PoolingGrad {
|
||||
format: Format = 0;
|
||||
|
@ -390,10 +394,11 @@ table DeConv2D {
|
|||
hasBias: bool = false;
|
||||
activationType: ActivationType = 0;
|
||||
}
|
||||
table BNGradInput {
|
||||
table BNGrad {
|
||||
eps : float;
|
||||
channels: int;
|
||||
momentum: float;
|
||||
}
|
||||
|
||||
table Scale {
|
||||
axis: int;
|
||||
}
|
||||
|
@ -841,7 +846,10 @@ table SquaredDifference {
|
|||
table TupleGetItem {
|
||||
}
|
||||
|
||||
table OptMomentum {
|
||||
table ApplyMomentum {
|
||||
gradientScale: float;
|
||||
useLocking: bool;
|
||||
useNesterov: bool;
|
||||
}
|
||||
|
||||
|
||||
|
@ -884,6 +892,10 @@ table ToFormat {
|
|||
dstT: int;
|
||||
}
|
||||
|
||||
|
||||
table Depend {
|
||||
}
|
||||
|
||||
table Return {
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ set(LITE_SRC
|
|||
)
|
||||
|
||||
if (SUPPORT_GPU)
|
||||
set(LITE_SRC
|
||||
set(LITE_SRC
|
||||
${LITE_SRC}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/subgraph_opencl_kernel.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/utils.cc
|
||||
|
@ -36,6 +36,24 @@ if (SUPPORT_GPU)
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_runtime.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_wrapper.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
if (SUPPORT_TRAIN)
|
||||
set(ANF_SRC
|
||||
${ANF_SRC}
|
||||
|
||||
)
|
||||
set(PASS_SRC)
|
||||
set(LITE_SRC
|
||||
${LITE_SRC}
|
||||
${ANF_SRC}
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR}/train/ops/train_ops.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/train/train_populate_parameter.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/train/train_session.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc
|
||||
)
|
||||
|
||||
endif ()
|
||||
|
||||
file(GLOB_RECURSE C_OPS_SRC ${CMAKE_CURRENT_SOURCE_DIR}/ops/*.cc)
|
||||
|
|
|
@ -110,6 +110,7 @@ int CompareOutputData(float *output_data, float *correct_data, int data_size) {
|
|||
}
|
||||
}
|
||||
error /= data_size;
|
||||
|
||||
if (error > 0.0001) {
|
||||
printf("has accuracy error!\n");
|
||||
printf("%f\n", error);
|
||||
|
@ -118,12 +119,14 @@ int CompareOutputData(float *output_data, float *correct_data, int data_size) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
void CompareOutput(float *output_data, std::string file_path) {
|
||||
int CompareOutput(float *output_data, std::string file_path) {
|
||||
size_t output_size;
|
||||
auto ground_truth = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_path.c_str(), &output_size));
|
||||
size_t output_num = output_size / sizeof(float);
|
||||
printf("output num : %zu\n", output_num);
|
||||
CompareOutputData(output_data, ground_truth, output_num);
|
||||
int res = CompareOutputData(output_data, ground_truth, output_num);
|
||||
delete [] ground_truth;
|
||||
return res;
|
||||
}
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -47,7 +47,7 @@ void WriteToTxt(const std::string& file_path, void *data, size_t element_size) {
|
|||
int WriteToBin(const std::string& file_path, void *data, size_t size);
|
||||
|
||||
int CompareOutputData(float *output_data, float *correct_data, int data_size);
|
||||
void CompareOutput(float *output_data, std::string file_path);
|
||||
int CompareOutput(float *output_data, std::string file_path);
|
||||
|
||||
std::string GetAndroidPackageName();
|
||||
std::string GetAndroidPackagePath();
|
||||
|
|
|
@ -47,7 +47,9 @@ int CompareRelativeOutput(float *output_data, std::string file_path) {
|
|||
auto ground_truth = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_path.c_str(), &output_size));
|
||||
size_t output_num = output_size / sizeof(float);
|
||||
std::cout << "output num : " << output_num << "\n";
|
||||
return CompareOutputRelativeData(output_data, ground_truth, output_num);
|
||||
int res = CompareOutputRelativeData(output_data, ground_truth, output_num);
|
||||
delete [] ground_truth;
|
||||
return res;
|
||||
}
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -39,6 +39,10 @@ int Executor::Run(std::vector<tensor::Tensor *> &in_tensors, std::vector<tensor:
|
|||
}
|
||||
}
|
||||
kernel::LiteKernelUtil::InitTensorRefCount(kernels);
|
||||
for (auto out_tensor : out_tensors) { // increase RefCount of output tensors, such that Run will not free them
|
||||
out_tensor->SetRefCount(out_tensor->RefCount() + 1);
|
||||
}
|
||||
|
||||
for (auto *kernel : kernels) {
|
||||
MS_ASSERT(nullptr != kernel);
|
||||
|
||||
|
@ -48,6 +52,8 @@ int Executor::Run(std::vector<tensor::Tensor *> &in_tensors, std::vector<tensor:
|
|||
MS_LOG(ERROR) << "run kernel before_callback failed, name: " << kernel->name();
|
||||
}
|
||||
}
|
||||
// JBDEBUG
|
||||
// std::cout << "executing kernel " << kernel->name() << "\n";
|
||||
auto ret = kernel->Run();
|
||||
if (0 != ret) {
|
||||
MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
|
||||
|
|
|
@ -27,7 +27,6 @@
|
|||
#include "src/ir/tensor.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
|
||||
// using mindspore::kernel::AddressPtr;
|
||||
namespace mindspore::kernel {
|
||||
using mindspore::lite::RET_ERROR;
|
||||
|
|
|
@ -112,11 +112,11 @@ int ModelImpl::BuildOps() {
|
|||
|
||||
Model *Model::Import(const char *model_buf, size_t size) {
|
||||
auto model = new Model();
|
||||
model->model_impl_ = ModelImpl::Import(model_buf, size);
|
||||
if (model_buf == nullptr) {
|
||||
MS_LOG(ERROR) << "model buf is null";
|
||||
return nullptr;
|
||||
}
|
||||
model->model_impl_ = ModelImpl::Import(model_buf, size);
|
||||
if (model->model_impl_ == nullptr) {
|
||||
MS_LOG(ERROR) << "model impl is null";
|
||||
return nullptr;
|
||||
|
|
|
@ -20,11 +20,11 @@ namespace mindspore {
|
|||
namespace lite {
|
||||
#ifdef PRIMITIVE_WRITEABLE
|
||||
int ActivationGrad::GetType() const { return this->primitive_->value.AsActivationGrad()->type; }
|
||||
|
||||
float ActivationGrad::GetAlpha() const { return this->primitive_->value.AsActivationGrad()->alpha; }
|
||||
void ActivationGrad::SetType(int type) {
|
||||
this->primitive_->value.AsActivationGrad()->type = (schema::ActivationGradType)type;
|
||||
this->primitive_->value.AsActivationGrad()->type = (schema::ActivationType)type;
|
||||
}
|
||||
|
||||
void ActivationGrad::SetAlpha(float alpha) { this->primitive_->value.AsActivationGrad()->alpha = alpha; }
|
||||
#else
|
||||
int ActivationGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
|
||||
MS_ASSERT(nullptr != primitive);
|
||||
|
@ -40,7 +40,7 @@ int ActivationGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flat
|
|||
return RET_OK;
|
||||
}
|
||||
int ActivationGrad::GetType() const { return this->primitive_->value_as_ActivationGrad()->type(); }
|
||||
|
||||
float ActivationGrad::GetAlpha() const { return this->primitive_->value_as_ActivationGrad()->alpha(); }
|
||||
#endif
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LITE_MINDSPORE_LITE_C_OPS_ACTIVATION_GRAD_H_
|
||||
#define LITE_MINDSPORE_LITE_C_OPS_ACTIVATION_GRAD_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_ACTIVATION_GRAD_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_ACTIVATION_GRAD_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
@ -32,13 +32,15 @@ class ActivationGrad : public PrimitiveC {
|
|||
ActivationGrad() = default;
|
||||
explicit ActivationGrad(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
|
||||
void SetType(int type);
|
||||
void SetAlpha(float alpha);
|
||||
#else
|
||||
ActivationGrad() = default;
|
||||
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
#endif
|
||||
int GetType() const;
|
||||
float GetAlpha() const;
|
||||
};
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
#endif // LITE_MINDSPORE_LITE_C_OPS_ACTIVATION_GRAD_H_
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_ACTIVATION_GRAD_H_
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
/**
|
||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/ops/apply_momentum.h"
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
||||
|
||||
#ifdef PRIMITIVE_WRITEABLE
|
||||
|
||||
#else
|
||||
int ApplyMomentum::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
|
||||
MS_ASSERT(nullptr != primitive);
|
||||
MS_ASSERT(nullptr != fbb);
|
||||
auto attr = primitive->value_as_ApplyMomentum();
|
||||
if (attr == nullptr) {
|
||||
MS_LOG(ERROR) << "value_as_ApplyMomentum return nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto val_offset = schema::CreateApplyMomentum(*fbb);
|
||||
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_ActivationGrad, val_offset.o);
|
||||
fbb->Finish(prim_offset);
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
int ApplyMomentum::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::Tensor *> outputs) {
|
||||
if (5 != inputs.size()) {
|
||||
MS_LOG(ERROR) << "ApplyMomentum should have at 5 input tensors";
|
||||
return RET_ERROR;
|
||||
}
|
||||
// if (outputs.empty()) {
|
||||
// MS_LOG(ERROR) << "ApplyMomentumCPUKernel error input output size!";
|
||||
// return RET_ERROR;
|
||||
// }
|
||||
|
||||
if (inputs[0]->ElementsNum() != inputs[1]->ElementsNum() || inputs[0]->ElementsNum() != inputs[3]->ElementsNum() ||
|
||||
inputs[2]->ElementsNum() != 1 || inputs[4]->ElementsNum() != 1) {
|
||||
MS_LOG(ERROR) << "error input data size!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (!outputs.empty()) {
|
||||
auto *out = outputs.front();
|
||||
MS_ASSERT(out != nullptr);
|
||||
out->set_data_type(inputs[0]->data_type());
|
||||
out->SetFormat(inputs[0]->GetFormat());
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,44 @@
|
|||
/**
|
||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_APPLY_MOMENTUM_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_APPLY_MOMENTUM_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include "ir/dtype/type_id.h"
|
||||
#include "src/ops/primitive_c.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
class ApplyMomentum : public PrimitiveC {
|
||||
public:
|
||||
#ifdef PRIMITIVE_WRITEABLE
|
||||
MS_DECLARE_PARENT(ApplyMomentum, PrimitiveC);
|
||||
ApplyMomentum() = default;
|
||||
explicit ApplyMomentum(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
|
||||
#else
|
||||
ApplyMomentum() = default;
|
||||
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
#endif
|
||||
int InferShape(std::vector<lite::tensor::Tensor *> inputs_, std::vector<lite::tensor::Tensor *> outputs_) override;
|
||||
};
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_APPLY_MOMENTUM_H_
|
|
@ -0,0 +1,108 @@
|
|||
/**
|
||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/ops/arithmetic_grad.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "src/ir/tensor.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
int ArithmeticGrad::InferShape(std::vector<lite::tensor::Tensor *> inputs_,
|
||||
std::vector<lite::tensor::Tensor *> outputs_) {
|
||||
if (inputs_.size() != 3) {
|
||||
MS_LOG(ERROR) << "The number of input must be 3";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (outputs_.size() != 2) {
|
||||
MS_LOG(ERROR) << "The number of output must be 2";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto dy = inputs_[0];
|
||||
auto x1 = inputs_[1];
|
||||
auto x2 = inputs_[2];
|
||||
auto dx1 = outputs_[0];
|
||||
auto dx2 = outputs_[1];
|
||||
|
||||
MS_ASSERT(dy != nullptr);
|
||||
MS_ASSERT(x1 != nullptr);
|
||||
MS_ASSERT(x2 != nullptr);
|
||||
MS_ASSERT(dx1 != nullptr);
|
||||
MS_ASSERT(dx2 != nullptr);
|
||||
|
||||
auto inShape0 = x1->shape();
|
||||
auto inShape1 = x2->shape();
|
||||
auto outShape = dy->shape();
|
||||
|
||||
if ((Type() == schema::PrimitiveType_AddGrad) || (Type() == schema::PrimitiveType_SubGrad)) {
|
||||
ndim_ = outShape.size();
|
||||
auto fillDimNum0 = outShape.size() - inShape0.size();
|
||||
auto fillDimNum1 = outShape.size() - inShape1.size();
|
||||
int j0 = 0;
|
||||
int j1 = 0;
|
||||
for (unsigned int i = 0; i < outShape.size(); i++) {
|
||||
x1_shape_[i] = (i < fillDimNum0) ? 1 : inShape0[j0++];
|
||||
x2_shape_[i] = (i < fillDimNum1) ? 1 : inShape1[j1++];
|
||||
dy_shape_[i] = outShape[i];
|
||||
}
|
||||
} else {
|
||||
// if (inShape0.size() < inShape1.size())
|
||||
if (dx1->ElementsNum() < dx2->ElementsNum()) {
|
||||
ndim_ = inShape1.size();
|
||||
auto fillDimNum = inShape1.size() - inShape0.size(); // This will not work for batch!
|
||||
int j = 0;
|
||||
for (unsigned int i = 0; i < inShape1.size(); i++) {
|
||||
if (i < fillDimNum) {
|
||||
x2_shape_[i] = 1;
|
||||
} else {
|
||||
x2_shape_[i] = inShape0[j++];
|
||||
}
|
||||
x1_shape_[i] = inShape1[i];
|
||||
dy_shape_[i] = outShape[i];
|
||||
}
|
||||
} else if (dx2->ElementsNum() < dx1->ElementsNum()) { // if (inShape0.size() > inShape1.size())
|
||||
ndim_ = inShape0.size();
|
||||
broadcasting_ = true;
|
||||
ndim_ = inShape0.size();
|
||||
int j = 0;
|
||||
auto fillDimNum = inShape0.size() - inShape1.size();
|
||||
for (unsigned int i = 0; i < inShape0.size(); i++) {
|
||||
if (i < fillDimNum) {
|
||||
x2_shape_[i] = 1;
|
||||
} else {
|
||||
x2_shape_[i] = inShape1[j++];
|
||||
}
|
||||
x1_shape_[i] = inShape0[i];
|
||||
dy_shape_[i] = outShape[i];
|
||||
}
|
||||
} else {
|
||||
broadcasting_ = false;
|
||||
for (unsigned int i = 0; i < inShape0.size(); i++) {
|
||||
x2_shape_[i] = inShape1[i];
|
||||
x1_shape_[i] = inShape0[i];
|
||||
dy_shape_[i] = outShape[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dx1->set_shape(x1->shape());
|
||||
dx2->set_shape(x2->shape());
|
||||
dx1->set_data_type(dy->data_type());
|
||||
dx2->set_data_type(dy->data_type());
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,58 @@
|
|||
/**
|
||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_ARITHMETIC_GRAD_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_ARITHMETIC_GRAD_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include "ir/dtype/type_id.h"
|
||||
#include "src/ops/primitive_c.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
class ArithmeticGrad : public PrimitiveC {
|
||||
public:
|
||||
#ifdef PRIMITIVE_WRITEABLE
|
||||
MS_DECLARE_PARENT(ArithmeticGrad, PrimitiveC);
|
||||
ArithmeticGrad() = default;
|
||||
explicit ArithmeticGrad(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
|
||||
#else
|
||||
// explicit Arithmetic(schema::Primitive *primitive) : PrimitiveC(primitive) {}
|
||||
ArithmeticGrad() = default;
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override {
|
||||
return RET_ERROR;
|
||||
}
|
||||
#endif
|
||||
int InferShape(std::vector<lite::tensor::Tensor *> inputs_, std::vector<lite::tensor::Tensor *> outputs_) override;
|
||||
bool Broadcasting() { return this->broadcasting_; }
|
||||
int NDims() { return this->ndim_; }
|
||||
std::vector<int> dyShape() { return this->dy_shape_; }
|
||||
std::vector<int> x1Shape() { return this->x1_shape_; }
|
||||
std::vector<int> x2Shape() { return this->x2_shape_; }
|
||||
|
||||
protected:
|
||||
bool broadcasting_ = false;
|
||||
int ndim_;
|
||||
std::vector<int> dy_shape_;
|
||||
std::vector<int> x1_shape_;
|
||||
std::vector<int> x2_shape_;
|
||||
};
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_ARITHMETIC_GRAD_H_
|
|
@ -48,6 +48,32 @@ std::vector<int> BiasGrad::GetAxis() const {
|
|||
return std::vector<int>(fb_vector->begin(), fb_vector->end());
|
||||
}
|
||||
|
||||
int BiasGrad::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::Tensor *> outputs) {
|
||||
if (1 != inputs.size()) {
|
||||
MS_LOG(ERROR) << "BiasGrad should have one input";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (1 != outputs.size()) {
|
||||
MS_LOG(ERROR) << "BiasGrad should have one output";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto *in0 = inputs.front();
|
||||
auto *out = outputs.front();
|
||||
MS_ASSERT(in0 != nullptr);
|
||||
MS_ASSERT(out != nullptr);
|
||||
auto inshape = in0->shape();
|
||||
int ndim = inshape.size();
|
||||
for (int i = 0; i < ndim - 1; i++) {
|
||||
inshape[i] = 1;
|
||||
}
|
||||
out->set_shape(inshape);
|
||||
out->set_data_type(in0->data_type());
|
||||
out->SetFormat(in0->GetFormat());
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LITE_MINDSPORE_LITE_C_OPS_BIAS_GRAD_H_
|
||||
#define LITE_MINDSPORE_LITE_C_OPS_BIAS_GRAD_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_BIAS_GRAD_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_BIAS_GRAD_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
@ -38,10 +38,11 @@ class BiasGrad : public PrimitiveC {
|
|||
BiasGrad() = default;
|
||||
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
int InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::Tensor *> outputs) override;
|
||||
#endif
|
||||
std::vector<int> GetAxis() const;
|
||||
};
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // LITE_MINDSPORE_LITE_C_OPS_BIAS_GRAD_H_
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_BIAS_GRAD_H_
|
||||
|
|
|
@ -14,33 +14,33 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/ops/bn_grad_input.h"
|
||||
#include "src/ops/bn_grad.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
#ifdef PRIMITIVE_WRITEABLE
|
||||
float BNGradInput::GetEps() const { return this->primitive_->value.AsBNGradInput()->eps; }
|
||||
int BNGradInput::GetChannels() const { return this->primitive_->value.AsBNGradInput()->channels; }
|
||||
float BNGrad::GetEps() const { return this->primitive_->value.AsBNGrad()->eps; }
|
||||
float BNGrad::GetMomentum() const { return this->primitive_->value.AsBNGrad()->momentum; }
|
||||
|
||||
void BNGradInput::SetEps(float eps) { this->primitive_->value.AsBNGradInput()->eps = eps; }
|
||||
void BNGradInput::SetChannels(int channels) { this->primitive_->value.AsBNGradInput()->channels = channels; }
|
||||
void BNGrad::SetEps(float eps) { this->primitive_->value.AsBNGrad()->eps = eps; }
|
||||
void BNGrad::SetMomentum(float momentum) { this->primitive_->value.AsBNGrad()->momentum = momentum; }
|
||||
|
||||
#else
|
||||
int BNGradInput::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
|
||||
int BNGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
|
||||
MS_ASSERT(nullptr != primitive);
|
||||
MS_ASSERT(nullptr != fbb);
|
||||
auto attr = primitive->value_as_BNGradInput();
|
||||
auto attr = primitive->value_as_BNGrad();
|
||||
if (attr == nullptr) {
|
||||
MS_LOG(ERROR) << "value_as_BNGradInput return nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto val_offset = schema::CreateBNGradInput(*fbb, attr->eps(), attr->channels());
|
||||
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_BNGradInput, val_offset.o);
|
||||
auto val_offset = schema::CreateBNGrad(*fbb, attr->eps(), attr->momentum());
|
||||
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_BNGrad, val_offset.o);
|
||||
fbb->Finish(prim_offset);
|
||||
return RET_OK;
|
||||
}
|
||||
float BNGradInput::GetEps() const { return this->primitive_->value_as_BNGradInput()->eps(); }
|
||||
int BNGradInput::GetChannels() const { return this->primitive_->value_as_BNGradInput()->channels(); }
|
||||
float BNGrad::GetEps() const { return this->primitive_->value_as_BNGrad()->eps(); }
|
||||
float BNGrad::GetMomentum() const { return this->primitive_->value_as_BNGrad()->momentum(); }
|
||||
|
||||
#endif
|
||||
} // namespace lite
|
|
@ -25,21 +25,20 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
class BNGradInput : public PrimitiveC {
|
||||
class BNGrad : public PrimitiveC {
|
||||
public:
|
||||
#ifdef PRIMITIVE_WRITEABLE
|
||||
MS_DECLARE_PARENT(BNGradInput, PrimitiveC);
|
||||
BNGradInput() = default;
|
||||
explicit BNGradInput(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
|
||||
MS_DECLARE_PARENT(BNGrad, PrimitiveC);
|
||||
BNGrad() = default;
|
||||
explicit BNGrad(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
|
||||
void SetEps(float eps);
|
||||
void SetChannels(int channels);
|
||||
void SetMomentum(float momentum);
|
||||
#else
|
||||
BNGradInput() = default;
|
||||
|
||||
BNGrad() = default;
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
#endif
|
||||
float GetEps() const;
|
||||
int GetChannels() const;
|
||||
float GetMomentum() const;
|
||||
};
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
|
@ -105,5 +105,47 @@ int Conv2DGradFilter::GetActivationType() const {
|
|||
}
|
||||
|
||||
#endif
|
||||
|
||||
int Conv2DGradFilter::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::Tensor *> outputs) {
|
||||
if (3 != inputs.size()) {
|
||||
MS_LOG(ERROR) << "Conv2d Grad Filter should have 3 inputs";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (1 != outputs.size()) {
|
||||
MS_LOG(ERROR) << "Conv2d Grad Filter should have one output";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto *in0 = inputs.at(0);
|
||||
auto *in = inputs.at(2);
|
||||
MS_ASSERT(out != nullptr);
|
||||
|
||||
std::vector<int> output_shape;
|
||||
int *out_shape = reinterpret_cast<int *>(in->Data());
|
||||
int new_size = in->ElementsNum();
|
||||
if (in0->GetFormat() == in->GetFormat()) {
|
||||
for (int i = 0; i < new_size; i++) output_shape.push_back(out_shape[i]);
|
||||
} else {
|
||||
if ((in0->GetFormat() == schema::Format_NHWC) && (in->GetFormat() == schema::Format_NCHW)) {
|
||||
output_shape.push_back(out_shape[0]);
|
||||
output_shape.push_back(out_shape[2]);
|
||||
output_shape.push_back(out_shape[3]);
|
||||
output_shape.push_back(out_shape[1]);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Shape covnert is not supported";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
auto *out = outputs.at(0);
|
||||
MS_ASSERT(out != nullptr);
|
||||
|
||||
out->set_shape(output_shape);
|
||||
out->set_data_type(in0->data_type());
|
||||
out->SetFormat(in0->GetFormat());
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LITE_MINDSPORE_LITE_C_OPS_CONV2_D_GRAD_FILTER_H_
|
||||
#define LITE_MINDSPORE_LITE_C_OPS_CONV2_D_GRAD_FILTER_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_CONV2D_GRAD_FILTER_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_CONV2D_GRAD_FILTER_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
@ -53,6 +53,7 @@ class Conv2DGradFilter : public PrimitiveC {
|
|||
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
#endif
|
||||
int InferShape(std::vector<lite::tensor::Tensor *> inputs_, std::vector<lite::tensor::Tensor *> outputs_) override;
|
||||
int GetFormat() const;
|
||||
int GetGroup() const;
|
||||
int GetChannelIn() const;
|
||||
|
@ -74,4 +75,4 @@ class Conv2DGradFilter : public PrimitiveC {
|
|||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // LITE_MINDSPORE_LITE_C_OPS_CONV2_D_GRAD_FILTER_H_
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_CONV2D_GRAD_FILTER_H_
|
||||
|
|
|
@ -103,5 +103,46 @@ int Conv2DGradInput::GetActivationType() const {
|
|||
}
|
||||
|
||||
#endif
|
||||
|
||||
int Conv2DGradInput::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::Tensor *> outputs) {
|
||||
if (3 != inputs.size()) {
|
||||
MS_LOG(ERROR) << "Conv2d Grad Input should have 3 inputs";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (1 != outputs.size()) {
|
||||
MS_LOG(ERROR) << "Conv2d Grad input should have one output";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto *in0 = inputs.at(0);
|
||||
auto *in = inputs.at(2);
|
||||
MS_ASSERT(out != nullptr);
|
||||
|
||||
std::vector<int> output_shape;
|
||||
int *out_shape = reinterpret_cast<int *>(in->Data());
|
||||
int new_size = in->ElementsNum();
|
||||
if (in0->GetFormat() == in->GetFormat()) {
|
||||
for (int i = 0; i < new_size; i++) output_shape.push_back(out_shape[i]);
|
||||
} else {
|
||||
if ((in0->GetFormat() == schema::Format_NHWC) && (in->GetFormat() == schema::Format_NCHW)) {
|
||||
output_shape.push_back(out_shape[0]);
|
||||
output_shape.push_back(out_shape[2]);
|
||||
output_shape.push_back(out_shape[3]);
|
||||
output_shape.push_back(out_shape[1]);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Shape covnert is not supported";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
auto *out = outputs.at(0);
|
||||
MS_ASSERT(out != nullptr);
|
||||
out->set_shape(output_shape);
|
||||
out->set_data_type(in0->data_type());
|
||||
out->SetFormat(in0->GetFormat());
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LITE_MINDSPORE_LITE_C_OPS_CONV2_D_GRAD_INPUT_H_
|
||||
#define LITE_MINDSPORE_LITE_C_OPS_CONV2_D_GRAD_INPUT_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_CONV2D_GRAD_INPUT_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_CONV2D_GRAD_INPUT_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
@ -53,6 +53,7 @@ class Conv2DGradInput : public PrimitiveC {
|
|||
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
#endif
|
||||
int InferShape(std::vector<lite::tensor::Tensor *> inputs_, std::vector<lite::tensor::Tensor *> outputs_) override;
|
||||
int GetFormat() const;
|
||||
int GetGroup() const;
|
||||
int GetChannelIn() const;
|
||||
|
@ -74,4 +75,4 @@ class Conv2DGradInput : public PrimitiveC {
|
|||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // LITE_MINDSPORE_LITE_C_OPS_CONV2_D_GRAD_INPUT_H_
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_CONV2D_GRAD_INPUT_H_
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LITE_MINDSPORE_LITE_C_OPS_DE_DEPTHWISE_CONV2_D_H_
|
||||
#define LITE_MINDSPORE_LITE_C_OPS_DE_DEPTHWISE_CONV2_D_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_DEDEPTHWISE_CONV2D_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_DEDEPTHWISE_CONV2D_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
@ -84,4 +84,4 @@ class DeDepthwiseConv2D : public PrimitiveC {
|
|||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // LITE_MINDSPORE_LITE_C_OPS_DE_DEPTHWISE_CONV2_D_H_
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_DEDEPTHWISE_CONV2D_H_
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LITE_MINDSPORE_LITE_C_OPS_DEPTHWISE_CONV2_D_H_
|
||||
#define LITE_MINDSPORE_LITE_C_OPS_DEPTHWISE_CONV2_D_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_DEPTHWISE_CONV2D_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_DEPTHWISE_CONV2D_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
@ -94,4 +94,4 @@ class DepthwiseConv2D : public PrimitiveC {
|
|||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // LITE_MINDSPORE_LITE_C_OPS_DEPTHWISE_CONV2_D_H_
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_DEPTHWISE_CONV2D_H_
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LITE_MINDSPORE_LITE_SRC_OPS_MAKE_TUPLE_H_
|
||||
#define LITE_MINDSPORE_LITE_SRC_OPS_MAKE_TUPLE_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_MAKE_TUPLE_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_MAKE_TUPLE_H_
|
||||
#include <vector>
|
||||
#include "src/ops/primitive_c.h"
|
||||
|
||||
|
@ -37,4 +37,4 @@ class MakeTuple : public PrimitiveC {
|
|||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // LITE_MINDSPORE_LITE_SRC_OPS_MAKE_TUPLE_H_
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_MAKE_TUPLE_H_
|
||||
|
|
|
@ -86,5 +86,52 @@ int PoolingGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuf
|
|||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
int PoolingGrad::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::Tensor *> outputs_) {
|
||||
MS_ASSERT(this->primitive != nullptr);
|
||||
auto input = inputs_.at(0);
|
||||
MS_ASSERT(input != nullptr);
|
||||
int input_h = input->shape().at(1);
|
||||
int input_w = input->shape().at(2);
|
||||
|
||||
auto window_h = GetWindowH();
|
||||
auto window_w = GetWindowW();
|
||||
if (GetGlobal()) {
|
||||
window_h = input_h;
|
||||
window_w = input_w;
|
||||
}
|
||||
|
||||
pad_l_ = GetPadLeft();
|
||||
pad_u_ = GetPadUp();
|
||||
pad_d_ = GetPadDown();
|
||||
pad_r_ = GetPadRight();
|
||||
if (GetPadMode() == schema::PadMode_SAME) {
|
||||
int output_w = std::ceil(static_cast<float>(input_w) / static_cast<float>(GetStrideW()));
|
||||
int output_h = std::ceil(static_cast<float>(input_h) / static_cast<float>(GetStrideH()));
|
||||
auto pad_h_all = ((output_h - 1) * GetStrideH() + (window_h - 1) + 1 - input_h);
|
||||
auto pad_w_all = ((output_w - 1) * GetStrideW() + (window_w - 1) + 1 - input_w);
|
||||
if (pad_h_all < 0) {
|
||||
pad_u_ = pad_d_ = 0;
|
||||
} else {
|
||||
pad_u_ = pad_h_all / 2;
|
||||
pad_d_ = pad_h_all - pad_u_;
|
||||
}
|
||||
if (pad_w_all < 0) {
|
||||
pad_l_ = pad_r_ = 0;
|
||||
} else {
|
||||
pad_l_ = pad_w_all / 2;
|
||||
pad_r_ = pad_w_all - pad_l_;
|
||||
}
|
||||
}
|
||||
auto grad_output = outputs_.at(0);
|
||||
// todo: fmk type
|
||||
auto output_shape = input->shape();
|
||||
grad_output->set_shape(output_shape);
|
||||
grad_output->set_data_type(input->data_type());
|
||||
// todo: temp fix
|
||||
grad_output->SetFormat(input->GetFormat());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LITE_MINDSPORE_LITE_C_OPS_POOLING_GRAD_H_
|
||||
#define LITE_MINDSPORE_LITE_C_OPS_POOLING_GRAD_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_POOLING_GRAD_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_POOLING_GRAD_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
@ -49,6 +49,7 @@ class PoolingGrad : public PrimitiveC {
|
|||
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
#endif
|
||||
int InferShape(std::vector<lite::tensor::Tensor *> inputs_, std::vector<lite::tensor::Tensor *> outputs_) override;
|
||||
int GetFormat() const;
|
||||
int GetPoolingMode() const;
|
||||
bool GetGlobal() const;
|
||||
|
@ -62,8 +63,14 @@ class PoolingGrad : public PrimitiveC {
|
|||
int GetPadLeft() const;
|
||||
int GetPadRight() const;
|
||||
int GetRoundMode() const;
|
||||
|
||||
protected:
|
||||
int pad_u_ = 0;
|
||||
int pad_d_ = 0;
|
||||
int pad_l_ = 0;
|
||||
int pad_r_ = 0;
|
||||
};
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // LITE_MINDSPORE_LITE_C_OPS_POOLING_GRAD_H_
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_POOLING_GRAD_H_
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LITE_MINDSPORE_LITE_C_OPS_POWER_GRAD_H_
|
||||
#define LITE_MINDSPORE_LITE_C_OPS_POWER_GRAD_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_POWER_GRAD_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_POWER_GRAD_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
@ -46,4 +46,4 @@ class PowerGrad : public PrimitiveC {
|
|||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // LITE_MINDSPORE_LITE_C_OPS_POWER_GRAD_H_
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_POWER_GRAD_H_
|
||||
|
|
|
@ -125,6 +125,21 @@
|
|||
#ifdef PRIMITIVE_WRITEABLE
|
||||
#include "tools/converter/quantizer/quantize_util.h"
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_TRAIN
|
||||
#include "src/ops/activation_grad.h"
|
||||
#include "src/ops/apply_momentum.h"
|
||||
#include "src/ops/bias_grad.h"
|
||||
#include "src/ops/pooling_grad.h"
|
||||
#include "src/ops/conv2d_grad_filter.h"
|
||||
#include "src/ops/conv2d_grad_input.h"
|
||||
#include "src/ops/power_grad.h"
|
||||
#include "src/ops/softmax_cross_entropy.h"
|
||||
#include "src/ops/bn_grad.h"
|
||||
#include "src/ops/arithmetic_grad.h"
|
||||
#endif
|
||||
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
#ifdef PRIMITIVE_WRITEABLE
|
||||
|
@ -353,6 +368,22 @@ std::shared_ptr<PrimitiveC> PrimitiveC::UnPackFromPrimitive(const Primitive &pri
|
|||
return NewPrimitiveC<TupleGetItem>(prim, inputs, quantType);
|
||||
} else if (op_type == "Softmax") {
|
||||
return NewPrimitiveC<SoftMax>(prim, inputs, quantType);
|
||||
#ifdef SUPPORT_TRAIN0
|
||||
} else if ((op_type == "ReluGrad" || op_type == "Relu6Grad" || op_type == "SigmoidGrad")) {
|
||||
return NewPrimitiveC<ActivationGrad>(prim, inputs, quantType);
|
||||
} else if ((op_type == "MaxPoolGrad") || (op_type == "MeanPoolGrad")) {
|
||||
return NewPrimitiveC<PoolingGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "Conv2DBackpropFilter") {
|
||||
return NewPrimitiveC<Conv2DGradFilter>(prim, inputs, quantType);
|
||||
} else if (op_type == "Conv2DBackpropInput") {
|
||||
return NewPrimitiveC<Conv2DGradInput>(prim, inputs, quantType);
|
||||
} else if (op_type == "BiasAddGrad") {
|
||||
return NewPrimitiveC<BiasGrad>(prim, inputs, quantType);
|
||||
} else if (op_type == "ApplyMomentum") {
|
||||
return NewPrimitiveC<ApplyMomentum>(prim, inputs, quantType);
|
||||
} else if (op_type == "BatchNormGrad") {
|
||||
return NewPrimitiveC<BNGrad>(prim, inputs, quantType);
|
||||
#endif
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported primitive type in UnPackFromPrimitive : " << op_type;
|
||||
return nullptr;
|
||||
|
@ -565,6 +596,32 @@ PrimitiveC *PrimitiveC::UnPackFromSchemaPrimitiveT(mindspore::schema::PrimitiveT
|
|||
return new SparseToDense(primitive);
|
||||
case schema::PrimitiveType_DetectionPostProcess:
|
||||
return new DetectionPostProcess(primitive);
|
||||
|
||||
#ifdef SUPPORT_TRAIN
|
||||
case schema::PrimitiveType_ActivationGrad:
|
||||
return new ActivationGrad(primitive);
|
||||
case schema::PrimitiveType_PoolingGrad:
|
||||
return new PoolingGrad(primitive);
|
||||
case schema::PrimitiveType_Conv2DGradFilter:
|
||||
return new Conv2DGradFilter(primitive);
|
||||
case schema::PrimitiveType_Conv2DGradInput:
|
||||
return new Conv2DGradInput(primitive);
|
||||
case schema::PrimitiveType_BiasGrad:
|
||||
return new BiasGrad(primitive);
|
||||
case schema::PrimitiveType_ApplyMomentum:
|
||||
return new ApplyMomentum(primitive);
|
||||
case schema::PrimitiveType_BNGrad:
|
||||
return new BNGrad(primitive);
|
||||
case schema::PrimitiveType_AddGrad:
|
||||
return new ArithmeticGrad(primitive);
|
||||
case schema::PrimitiveType_SubGrad:
|
||||
return new ArithmeticGrad(primitive);
|
||||
case schema::PrimitiveType_MulGrad:
|
||||
return new ArithmeticGrad(primitive);
|
||||
case schema::PrimitiveType_DivGrad:
|
||||
return new ArithmeticGrad(primitive);
|
||||
#endif
|
||||
|
||||
default:
|
||||
MS_LOG(ERROR) << "Unsupported primitive type in UnPackFromSchemaPrimitiveT : "
|
||||
<< schema::EnumNamePrimitiveType(op_type);
|
||||
|
@ -779,6 +836,31 @@ PrimitiveC *PrimitiveC::UnPackFromSchemaPrimitive(const schema::Primitive *primi
|
|||
return NewPrimitiveC<SparseToDense>(primitive);
|
||||
case schema::PrimitiveType_DetectionPostProcess:
|
||||
return NewPrimitiveC<DetectionPostProcess>(primitive);
|
||||
|
||||
#ifdef SUPPORT_TRAIN
|
||||
case schema::PrimitiveType_ActivationGrad:
|
||||
return NewPrimitiveC<ActivationGrad>(primitive);
|
||||
case schema::PrimitiveType_PoolingGrad:
|
||||
return NewPrimitiveC<PoolingGrad>(primitive);
|
||||
case schema::PrimitiveType_Conv2DGradFilter:
|
||||
return NewPrimitiveC<Conv2DGradFilter>(primitive);
|
||||
case schema::PrimitiveType_Conv2DGradInput:
|
||||
return NewPrimitiveC<Conv2DGradInput>(primitive);
|
||||
case schema::PrimitiveType_BiasGrad:
|
||||
return NewPrimitiveC<BiasGrad>(primitive);
|
||||
case schema::PrimitiveType_ApplyMomentum:
|
||||
return NewPrimitiveC<ApplyMomentum>(primitive);
|
||||
case schema::PrimitiveType_BNGrad:
|
||||
return NewPrimitiveC<BNGrad>(primitive);
|
||||
case schema::PrimitiveType_AddGrad:
|
||||
return NewPrimitiveC<ArithmeticGrad>(primitive);
|
||||
case schema::PrimitiveType_SubGrad:
|
||||
return NewPrimitiveC<ArithmeticGrad>(primitive);
|
||||
case schema::PrimitiveType_MulGrad:
|
||||
return NewPrimitiveC<ArithmeticGrad>(primitive);
|
||||
case schema::PrimitiveType_DivGrad:
|
||||
return NewPrimitiveC<ArithmeticGrad>(primitive);
|
||||
#endif
|
||||
default:
|
||||
MS_LOG(ERROR) << "Unsupported primitive type in UnPackFromSchemaPrimitive : "
|
||||
<< schema::EnumNamePrimitiveType(op_type);
|
||||
|
|
|
@ -115,7 +115,7 @@ constexpr size_t kInputSize = 1;
|
|||
constexpr size_t kOutputSize = 1;
|
||||
} // namespace
|
||||
int Reduce::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::Tensor *> outputs_) {
|
||||
if (inputs_.size() != kInputSize || outputs_.size() != kOutputSize) {
|
||||
if (inputs_.size() < kInputSize || outputs_.size() != kOutputSize) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto input = inputs_.front();
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LITE_MINDSPORE_LITE_C_OPS_RESHAPE_H_
|
||||
#define LITE_MINDSPORE_LITE_C_OPS_RESHAPE_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_RESHAPE_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_RESHAPE_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
@ -50,4 +50,4 @@ class Reshape : public PrimitiveC {
|
|||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // LITE_MINDSPORE_LITE_C_OPS_RESHAPE_H_
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_RESHAPE_H_
|
||||
|
|
|
@ -51,5 +51,31 @@ int SoftmaxCrossEntropy::UnPackToFlatBuilder(const schema::Primitive *primitive,
|
|||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
int SoftmaxCrossEntropy::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::Tensor *> outputs) {
|
||||
if (1 > outputs.size()) {
|
||||
MS_LOG(ERROR) << "SoftmaxCrossEntropy should have at least one output";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto *in0 = inputs.front();
|
||||
MS_ASSERT(in0 != nullptr);
|
||||
auto *out = outputs.front();
|
||||
MS_ASSERT(out != nullptr);
|
||||
|
||||
std::vector<int> outshape;
|
||||
outshape.push_back(1);
|
||||
out->set_shape(outshape);
|
||||
out->set_data_type(in0->data_type());
|
||||
|
||||
if (1 < outputs.size()) {
|
||||
auto *grads = outputs.at(1);
|
||||
MS_ASSERT(grads != nullptr);
|
||||
grads->set_shape(in0->shape());
|
||||
grads->set_data_type(in0->data_type());
|
||||
grads->SetFormat(in0->GetFormat());
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LITE_MINDSPORE_LITE_C_OPS_SOFTMAX_CROSS_ENTROPY_H_
|
||||
#define LITE_MINDSPORE_LITE_C_OPS_SOFTMAX_CROSS_ENTROPY_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_OPS_SOFTMAX_CROSS_ENTROPY_H_
|
||||
#define MINDSPORE_LITE_SRC_OPS_SOFTMAX_CROSS_ENTROPY_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
@ -39,9 +39,11 @@ class SoftmaxCrossEntropy : public PrimitiveC {
|
|||
|
||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||
#endif
|
||||
int InferShape(std::vector<lite::tensor::Tensor *> inputs_, std::vector<lite::tensor::Tensor *> outputs_) override;
|
||||
|
||||
std::vector<int> GetAxis() const;
|
||||
};
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // LITE_MINDSPORE_LITE_C_OPS_SOFTMAX_CROSS_ENTROPY_H_
|
||||
#endif // MINDSPORE_LITE_SRC_OPS_SOFTMAX_CROSS_ENTROPY_H_
|
||||
|
|
|
@ -1678,6 +1678,13 @@ PopulateParameterFunc PopulateParameterRegistry::GetParameterFunc(int type) {
|
|||
return populate_parameter_funcs_[schema::PrimitiveType(type)];
|
||||
}
|
||||
|
||||
int PopulateParameterRegistry::AddPopulateParameterFunc(const schema::PrimitiveType &type, PopulateParameterFunc func) {
|
||||
if ((type < schema::PrimitiveType_MIN)|| (type > schema::PrimitiveType_MAX))
|
||||
return -1;
|
||||
populate_parameter_funcs_[type] = func;
|
||||
return 0;
|
||||
}
|
||||
|
||||
OpParameter *PopulateParameter(const mindspore::lite::PrimitiveC *primitive) {
|
||||
if (primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op.";
|
||||
|
|
|
@ -30,12 +30,16 @@ class PopulateParameterRegistry {
|
|||
~PopulateParameterRegistry() = default;
|
||||
|
||||
static PopulateParameterRegistry *GetInstance();
|
||||
int AddPopulateParameterFunc(const schema::PrimitiveType &type, PopulateParameterFunc func);
|
||||
PopulateParameterFunc GetParameterFunc(int type);
|
||||
|
||||
protected:
|
||||
PopulateParameterFunc populate_parameter_funcs_[schema::PrimitiveType_MAX + 1];
|
||||
};
|
||||
|
||||
OpParameter *PopulateActivationParameter(const lite::PrimitiveC *primitive);
|
||||
OpParameter *PopulateArithmetic(const lite::PrimitiveC *primitive);
|
||||
|
||||
OpParameter *PopulateParameter(const mindspore::lite::PrimitiveC *primitive);
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_POPULATE_PARAMETER_H_
|
||||
|
|
|
@ -37,8 +37,8 @@ constexpr size_t kOutputNum = 1;
|
|||
} // namespace
|
||||
|
||||
int ReduceBaseCPUKernel::CheckInputsOutputs() {
|
||||
if (in_tensors_.size() != kInputNum) {
|
||||
MS_LOG(ERROR) << "Reduce inputs size should be " << kInputNum << " but got " << in_tensors_.size();
|
||||
if (in_tensors_.size() < kInputNum) {
|
||||
MS_LOG(ERROR) << "Reduce inputs size should be at least " << kInputNum << " but got " << in_tensors_.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors_.size() != kOutputNum) {
|
||||
|
@ -99,7 +99,15 @@ int ReduceBaseCPUKernel::Init() {
|
|||
if (reduce_param == nullptr) {
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
num_axes_ = reduce_param->num_axes_;
|
||||
if (in_tensors_.size() > 1) {
|
||||
auto axes_ptr = in_tensors_.at(1);
|
||||
num_axes_ = axes_ptr->ElementsNum();
|
||||
memcpy(axes_, axes_ptr->Data(), axes_ptr->Size());
|
||||
} else {
|
||||
num_axes_ = reduce_param->num_axes_;
|
||||
memcpy(axes_, reduce_param->axes_, sizeof(reduce_param->axes_));
|
||||
}
|
||||
|
||||
mode_ = reduce_param->mode_;
|
||||
memcpy(axes_, reduce_param->axes_, sizeof(reduce_param->axes_));
|
||||
reduce_to_end_ = reduce_param->reduce_to_end_;
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32_grad/activation_grad.h"
|
||||
#include "nnacl/fp32_grad/activation_grad.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/runtime/runtime_api.h"
|
||||
|
@ -24,41 +25,38 @@ using mindspore::kernel::KERNEL_ARCH::kCPU;
|
|||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::ActivationGradType_HSWISH;
|
||||
using mindspore::schema::ActivationGradType_LEAKY_RELU;
|
||||
using mindspore::schema::ActivationGradType_RELU;
|
||||
using mindspore::schema::ActivationGradType_RELU6;
|
||||
using mindspore::schema::ActivationType_HSWISH;
|
||||
using mindspore::schema::ActivationType_LEAKY_RELU;
|
||||
using mindspore::schema::ActivationType_RELU;
|
||||
using mindspore::schema::ActivationType_RELU6;
|
||||
using mindspore::schema::PrimitiveType_ActivationGrad;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int ActivationGradCPUKernel::Init() {
|
||||
outputs_[0]->set_shape(inputs_[0]->shape());
|
||||
return RET_OK;
|
||||
}
|
||||
int ActivationGradCPUKernel::Init() { return RET_OK; }
|
||||
|
||||
int ActivationGradCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int ActivationGradCPUKernel::DoActivation(int task_id) {
|
||||
auto yt_addr = reinterpret_cast<float *>(inputs_.at(0)->Data());
|
||||
auto input_addr = reinterpret_cast<float *>(inputs_.at(1)->Data());
|
||||
auto output_addr = reinterpret_cast<float *>(outputs_.at(0)->Data());
|
||||
auto length = inputs_.at(0)->ElementsNum();
|
||||
auto yt_addr = reinterpret_cast<float *>(in_tensors_.at(0)->Data());
|
||||
auto input_addr = reinterpret_cast<float *>(in_tensors_.at(1)->Data());
|
||||
auto output_addr = reinterpret_cast<float *>(out_tensors_.at(0)->Data());
|
||||
int length = in_tensors_.at(0)->ElementsNum();
|
||||
|
||||
auto error_code = RET_OK;
|
||||
|
||||
if (type_ == schema::ActivationGradType_RELU) {
|
||||
if (param_act_grad_->type_ == schema::ActivationType_RELU) {
|
||||
error_code = ReluGrad(yt_addr, input_addr, length, output_addr);
|
||||
} else if (type_ == schema::ActivationGradType_RELU6) {
|
||||
} else if (param_act_grad_->type_ == schema::ActivationType_RELU6) {
|
||||
error_code = Relu6Grad(yt_addr, input_addr, length, output_addr);
|
||||
} else if (type_ == schema::ActivationGradType_LEAKY_RELU) {
|
||||
error_code = LReluGrad(yt_addr, input_addr, length, output_addr, alpha_);
|
||||
} else if (type_ == schema::ActivationGradType_SIGMOID) {
|
||||
} else if (param_act_grad_->type_ == schema::ActivationType_LEAKY_RELU) {
|
||||
error_code = LReluGrad(yt_addr, input_addr, length, output_addr, param_act_grad_->alpha_);
|
||||
} else if (param_act_grad_->type_ == schema::ActivationType_SIGMOID) {
|
||||
error_code = SigmoidGrad(yt_addr, input_addr, length, output_addr);
|
||||
} else if (type_ == schema::ActivationGradType_TANH) {
|
||||
} else if (param_act_grad_->type_ == schema::ActivationType_TANH) {
|
||||
error_code = TanhGrad(yt_addr, input_addr, length, output_addr);
|
||||
} else if (type_ == schema::ActivationGradType_HSWISH) {
|
||||
} else if (param_act_grad_->type_ == schema::ActivationType_HSWISH) {
|
||||
error_code = HSwishGrad(yt_addr, input_addr, length, output_addr);
|
||||
} else if (type_ == schema::ActivationGradType_HSIGMOID) {
|
||||
} else if (param_act_grad_->type_ == schema::ActivationType_HSIGMOID) {
|
||||
error_code = HSigmoidGrad(yt_addr, input_addr, length, output_addr);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Activation type error";
|
||||
|
@ -81,6 +79,12 @@ int ActivationGradRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ActivationGradCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return ret;
|
||||
}
|
||||
|
||||
int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ActivationGradRun, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
|
||||
|
|
|
@ -20,8 +20,7 @@
|
|||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "ir/anf.h"
|
||||
|
||||
#include "nnacl/activation_grad.h"
|
||||
#include "nnacl/fp32/activation.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ActivationGradCPUKernel : public LiteKernel {
|
||||
|
@ -30,9 +29,7 @@ class ActivationGradCPUKernel : public LiteKernel {
|
|||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(param, inputs, outputs, ctx, primitive) {
|
||||
ActivationGradParameter *param_act_grad = reinterpret_cast<ActivationGradParameter *>(param);
|
||||
type_ = param_act_grad->type_;
|
||||
alpha_ = param_act_grad->alpha_;
|
||||
param_act_grad_ = reinterpret_cast<ActivationParameter *>(param);
|
||||
}
|
||||
~ActivationGradCPUKernel() override = default;
|
||||
|
||||
|
@ -43,9 +40,9 @@ class ActivationGradCPUKernel : public LiteKernel {
|
|||
|
||||
private:
|
||||
int thread_count_;
|
||||
int type_;
|
||||
float alpha_;
|
||||
ActivationParameter *param_act_grad_;
|
||||
};
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_ACTIVATION_GRAD_H_
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32_grad/apply_momentum.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/runtime/kernel/arm/fp32/nchw2nhwc.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_ApplyMomentum;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
int ApplyMomentumCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int ApplyMomentumCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
|
||||
auto weight = reinterpret_cast<float *>(in_tensors_[0]->Data());
|
||||
auto accumulate = reinterpret_cast<float *>(in_tensors_[1]->Data());
|
||||
float learning_rate = reinterpret_cast<float *>(in_tensors_[2]->Data())[0];
|
||||
auto gradient = reinterpret_cast<float *>(in_tensors_[3]->Data());
|
||||
float moment = reinterpret_cast<float *>(in_tensors_[4]->Data())[0];
|
||||
size_t elem_num = in_tensors_[0]->ElementsNum();
|
||||
|
||||
// align format
|
||||
if (in_tensors_[3]->shape().size() == 4 &&
|
||||
in_tensors_[3]->GetFormat() == schema::Format_NCHW &&
|
||||
in_tensors_[0]->GetFormat() == schema::Format_KHWC) {
|
||||
PackNCHWToNHWCFp32(gradient, workspace, in_tensors_[0]->Batch(), in_tensors_[0]->Height() * in_tensors_[0]->Width(),
|
||||
in_tensors_[0]->Channel());
|
||||
} else {
|
||||
memcpy(workspace, gradient, in_tensors_[3]->ElementsNum() * sizeof(float));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < elem_num; ++i) {
|
||||
accumulate[i] = accumulate[i] * moment + workspace[i]; // * (1.0 - moment);
|
||||
weight[i] -= accumulate[i] * learning_rate;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ApplyMomentumCPUKernel::Init() {
|
||||
// Only for test with uninitialized Data
|
||||
size_t elem_num = in_tensors_[0]->ElementsNum();
|
||||
auto accumulate = reinterpret_cast<float *>(in_tensors_[1]->Data());
|
||||
for (int i =0; i < elem_num; i++) accumulate[i] = 0.0;
|
||||
|
||||
workspace = new float[elem_num];
|
||||
return 0;
|
||||
}
|
||||
#if 0
|
||||
OpParameter *PopulateApplyMomentumParameter(const lite::Primitive *primitive) {
|
||||
OpParameter *param = new (std::nothrow) OpParameter();
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "new Param for OptMomentum failed.";
|
||||
return nullptr;
|
||||
}
|
||||
param->type_ = primitive->Type();
|
||||
return param;
|
||||
}
|
||||
#endif
|
||||
|
||||
kernel::LiteKernel *CpuApplyMomentumFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc, const lite::PrimitiveC *primitive) {
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_ApplyMomentum);
|
||||
auto *kernel = new (std::nothrow) ApplyMomentumCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
MS_ASSERT(kernel != nullptr);
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (0 != ret) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_ApplyMomentum, CpuApplyMomentumFp32KernelCreator)
|
||||
} // namespace mindspore::kernel
|
|
@ -14,28 +14,32 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_OPT_MOMENTUM_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_OPT_MOMENTUM_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_APPLY_MOMENTUM_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_APPLY_MOMENTUM_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "ir/anf.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class OptMomentumCPUKernel : public LiteKernel {
|
||||
class ApplyMomentumCPUKernel : public LiteKernel {
|
||||
public:
|
||||
explicit OptMomentumCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
explicit ApplyMomentumCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
~OptMomentumCPUKernel() override {}
|
||||
~ApplyMomentumCPUKernel() override {delete [] workspace;}
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
|
||||
private:
|
||||
float *workspace;
|
||||
};
|
||||
|
||||
// OpParameter *PopulateApplyMomentumParameter(const lite::Primitive *primitive);
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_OPT_MOMENTUM_H_
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_APPLY_MOMENTUM_H_
|
|
@ -14,11 +14,11 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "nnacl/fp32_grad/reduce_grad.h"
|
||||
#include "nnacl/fp32_grad/arithmetic_grad.h"
|
||||
#include "src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
|
@ -33,108 +33,41 @@ constexpr int kArithGradOpOutputNum = 2;
|
|||
} // namespace
|
||||
|
||||
int ArithmeticGradCPUKernel::Init() {
|
||||
auto ret = InferShape();
|
||||
return ret;
|
||||
}
|
||||
auto dx1 = out_tensors_[0];
|
||||
auto dx2 = out_tensors_[1];
|
||||
|
||||
int ArithmeticGradCPUKernel::InferShape() {
|
||||
if (inputs_.size() != kArithGradOpInputNum) {
|
||||
MS_LOG(ERROR) << "The number of input must be " << kArithGradOpInputNum;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (outputs_.size() != kArithGradOpOutputNum) {
|
||||
MS_LOG(ERROR) << "The number of output must be " << kArithGradOpOutputNum;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto dy = inputs_[0];
|
||||
auto x1 = inputs_[1];
|
||||
auto x2 = inputs_[2];
|
||||
auto dx1 = outputs_[0];
|
||||
auto dx2 = outputs_[1];
|
||||
|
||||
MS_ASSERT(dy != nullptr);
|
||||
MS_ASSERT(x1 != nullptr);
|
||||
MS_ASSERT(x2 != nullptr);
|
||||
MS_ASSERT(dx1 != nullptr);
|
||||
MS_ASSERT(dx2 != nullptr);
|
||||
|
||||
auto inShape0 = x1->shape();
|
||||
auto inShape1 = x2->shape();
|
||||
auto outShape = dy->shape();
|
||||
|
||||
if ((type() == PrimitiveType_AddGrad) || (type() == PrimitiveType_SubGrad)) {
|
||||
arithmeticParameter_->ndim_ = outShape.size();
|
||||
auto fillDimNum0 = outShape.size() - inShape0.size();
|
||||
auto fillDimNum1 = outShape.size() - inShape1.size();
|
||||
int j0 = 0;
|
||||
int j1 = 0;
|
||||
for (unsigned int i = 0; i < outShape.size(); i++) {
|
||||
arithmeticParameter_->in_shape0_[i] = (i < fillDimNum0) ? 1 : inShape0[j0++];
|
||||
arithmeticParameter_->in_shape1_[i] = (i < fillDimNum1) ? 1 : inShape1[j1++];
|
||||
arithmeticParameter_->out_shape_[i] = outShape[i];
|
||||
}
|
||||
} else {
|
||||
if ((Type() == PrimitiveType_MulGrad) || (Type() == PrimitiveType_DivGrad)) {
|
||||
// if (inShape0.size() < inShape1.size())
|
||||
if (dx1->ElementsNum() < dx2->ElementsNum()) {
|
||||
arithmeticParameter_->ndim_ = inShape1.size();
|
||||
if (type() == PrimitiveType_MulGrad)
|
||||
if (Type() == PrimitiveType_MulGrad)
|
||||
arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul2L;
|
||||
else if (type() == PrimitiveType_DivGrad)
|
||||
else if (Type() == PrimitiveType_DivGrad)
|
||||
arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradDiv2L;
|
||||
|
||||
auto fillDimNum = inShape1.size() - inShape0.size(); // This will not work for batch!
|
||||
int j = 0;
|
||||
for (unsigned int i = 0; i < inShape1.size(); i++) {
|
||||
if (i < fillDimNum) {
|
||||
arithmeticParameter_->in_shape1_[i] = 1;
|
||||
} else {
|
||||
arithmeticParameter_->in_shape1_[i] = inShape0[j++];
|
||||
}
|
||||
arithmeticParameter_->in_shape0_[i] = inShape1[i];
|
||||
arithmeticParameter_->out_shape_[i] = outShape[i];
|
||||
}
|
||||
} else if (dx2->ElementsNum() < dx1->ElementsNum()) { // if (inShape0.size() > inShape1.size())
|
||||
arithmeticParameter_->ndim_ = inShape0.size();
|
||||
if (type() == PrimitiveType_MulGrad)
|
||||
if (Type() == PrimitiveType_MulGrad)
|
||||
arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul1L;
|
||||
else if (type() == PrimitiveType_DivGrad)
|
||||
else if (Type() == PrimitiveType_DivGrad)
|
||||
arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradDiv1L;
|
||||
arithmeticParameter_->broadcasting_ = true;
|
||||
arithmeticParameter_->ndim_ = inShape0.size();
|
||||
int j = 0;
|
||||
auto fillDimNum = inShape0.size() - inShape1.size();
|
||||
for (unsigned int i = 0; i < inShape0.size(); i++) {
|
||||
if (i < fillDimNum) {
|
||||
arithmeticParameter_->in_shape1_[i] = 1;
|
||||
} else {
|
||||
arithmeticParameter_->in_shape1_[i] = inShape1[j++];
|
||||
}
|
||||
arithmeticParameter_->in_shape0_[i] = inShape0[i];
|
||||
arithmeticParameter_->out_shape_[i] = outShape[i];
|
||||
}
|
||||
} else {
|
||||
arithmeticParameter_->broadcasting_ = false;
|
||||
for (unsigned int i = 0; i < inShape0.size(); i++) {
|
||||
arithmeticParameter_->in_shape1_[i] = inShape1[i];
|
||||
arithmeticParameter_->in_shape0_[i] = inShape0[i];
|
||||
arithmeticParameter_->out_shape_[i] = outShape[i];
|
||||
}
|
||||
}
|
||||
|
||||
tile_data0 = new (std::nothrow) float[inputs_.at(0)->ElementsNum()];
|
||||
tile_data0 = new (std::nothrow) float[in_tensors_.at(0)->ElementsNum()];
|
||||
if (tile_data0 == nullptr) {
|
||||
MS_LOG(ERROR) << "new data0 fail!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
tile_data1 = new (std::nothrow) float[inputs_.at(0)->ElementsNum()];
|
||||
tile_data1 = new (std::nothrow) float[in_tensors_.at(0)->ElementsNum()];
|
||||
if (tile_data1 == nullptr) {
|
||||
MS_LOG(ERROR) << "new data1 fail!";
|
||||
delete tile_data0;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (type() == PrimitiveType_DivGrad) {
|
||||
tile_data2 = new (std::nothrow) float[inputs_.at(0)->ElementsNum()];
|
||||
if (Type() == PrimitiveType_DivGrad) {
|
||||
tile_data2 = new (std::nothrow) float[in_tensors_.at(0)->ElementsNum()];
|
||||
if (tile_data2 == nullptr) {
|
||||
MS_LOG(ERROR) << "new data2 fail!";
|
||||
delete tile_data0;
|
||||
|
@ -144,10 +77,6 @@ int ArithmeticGradCPUKernel::InferShape() {
|
|||
}
|
||||
}
|
||||
|
||||
dx1->set_shape(x1->shape());
|
||||
dx2->set_shape(x2->shape());
|
||||
dx1->set_data_type(dy->data_type());
|
||||
dx2->set_data_type(dy->data_type());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -187,16 +116,16 @@ void ArithmeticGradCPUKernel::ArithmeticGradSub(float *dy, int dy_size, float *d
|
|||
|
||||
void ArithmeticGradCPUKernel::ArithmeticGradMul(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2,
|
||||
int dx2_size) {
|
||||
auto x1_data = reinterpret_cast<float *>(inputs_[1]->Data());
|
||||
auto x2_data = reinterpret_cast<float *>(inputs_[2]->Data());
|
||||
auto x1_data = reinterpret_cast<float *>(in_tensors_[1]->Data());
|
||||
auto x2_data = reinterpret_cast<float *>(in_tensors_[2]->Data());
|
||||
ElementMul(dy, x1_data, dx2, dy_size);
|
||||
ElementMul(dy, x2_data, dx1, dy_size);
|
||||
}
|
||||
|
||||
void ArithmeticGradCPUKernel::ArithmeticGradMul1L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2,
|
||||
int dx2_size) {
|
||||
auto x1_data = reinterpret_cast<float *>(inputs_[1]->Data());
|
||||
auto x2_data = reinterpret_cast<float *>(inputs_[2]->Data());
|
||||
auto x1_data = reinterpret_cast<float *>(in_tensors_[1]->Data());
|
||||
auto x2_data = reinterpret_cast<float *>(in_tensors_[2]->Data());
|
||||
ElementMul(dy, x1_data, tile_data0, dy_size);
|
||||
ReduceSumByAxes(tile_data0, arithmeticParameter_->in_shape0_, dx2, arithmeticParameter_->in_shape1_,
|
||||
arithmeticParameter_->ndim_);
|
||||
|
@ -206,8 +135,8 @@ void ArithmeticGradCPUKernel::ArithmeticGradMul1L(float *dy, int dy_size, float
|
|||
|
||||
void ArithmeticGradCPUKernel::ArithmeticGradMul2L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2,
|
||||
int dx2_size) {
|
||||
auto x1_data = reinterpret_cast<float *>(inputs_[1]->Data());
|
||||
auto x2_data = reinterpret_cast<float *>(inputs_[2]->Data());
|
||||
auto x1_data = reinterpret_cast<float *>(in_tensors_[1]->Data());
|
||||
auto x2_data = reinterpret_cast<float *>(in_tensors_[2]->Data());
|
||||
ElementMul(dy, x2_data, tile_data0, dy_size);
|
||||
ReduceSumByAxes(tile_data0, arithmeticParameter_->in_shape0_, dx1, arithmeticParameter_->in_shape1_,
|
||||
arithmeticParameter_->ndim_);
|
||||
|
@ -217,16 +146,16 @@ void ArithmeticGradCPUKernel::ArithmeticGradMul2L(float *dy, int dy_size, float
|
|||
|
||||
void ArithmeticGradCPUKernel::ArithmeticGradDiv(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2,
|
||||
int dx2_size) {
|
||||
auto x1 = reinterpret_cast<float *>(inputs_[1]->Data());
|
||||
auto x2 = reinterpret_cast<float *>(inputs_[2]->Data());
|
||||
auto x1 = reinterpret_cast<float *>(in_tensors_[1]->Data());
|
||||
auto x2 = reinterpret_cast<float *>(in_tensors_[2]->Data());
|
||||
ElementDiv(dy, x2, dx1, dy_size);
|
||||
ElementMulAndDivNegSquare(dy, x1, x2, dx2, dy_size);
|
||||
}
|
||||
|
||||
void ArithmeticGradCPUKernel::ArithmeticGradDiv1L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2,
|
||||
int dx2_size) {
|
||||
auto x1_data = reinterpret_cast<float *>(inputs_[1]->Data());
|
||||
auto x2_data = reinterpret_cast<float *>(inputs_[2]->Data());
|
||||
auto x1_data = reinterpret_cast<float *>(in_tensors_[1]->Data());
|
||||
auto x2_data = reinterpret_cast<float *>(in_tensors_[2]->Data());
|
||||
|
||||
ElementMul(x2_data, x2_data, dx2, dx2_size);
|
||||
ElementMul(x1_data, dy, dx1, dy_size); // use dx1 buffer
|
||||
|
@ -243,8 +172,8 @@ void ArithmeticGradCPUKernel::ArithmeticGradDiv1L(float *dy, int dy_size, float
|
|||
|
||||
void ArithmeticGradCPUKernel::ArithmeticGradDiv2L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2,
|
||||
int dx2_size) {
|
||||
auto x1_data = reinterpret_cast<float *>(inputs_[1]->Data());
|
||||
auto x2_data = reinterpret_cast<float *>(inputs_[2]->Data());
|
||||
auto x1_data = reinterpret_cast<float *>(in_tensors_[1]->Data());
|
||||
auto x2_data = reinterpret_cast<float *>(in_tensors_[2]->Data());
|
||||
|
||||
// dx1 = dy/x2
|
||||
ElementDiv(dy, x2_data, tile_data0, dy_size); // first multiply into temp
|
||||
|
@ -259,13 +188,13 @@ void ArithmeticGradCPUKernel::ArithmeticGradDiv2L(float *dy, int dy_size, float
|
|||
int ArithmeticGradCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int ArithmeticGradCPUKernel::Run() {
|
||||
auto dy = reinterpret_cast<float *>(inputs_[0]->Data());
|
||||
auto dx1 = reinterpret_cast<float *>(outputs_[0]->Data());
|
||||
auto dx2 = reinterpret_cast<float *>(outputs_[1]->Data());
|
||||
auto dy = reinterpret_cast<float *>(in_tensors_[0]->Data());
|
||||
auto dx1 = reinterpret_cast<float *>(out_tensors_[0]->Data());
|
||||
auto dx2 = reinterpret_cast<float *>(out_tensors_[1]->Data());
|
||||
|
||||
size_t dy_size = inputs_.at(0)->ElementsNum();
|
||||
size_t dx1_size = outputs_.at(0)->ElementsNum();
|
||||
size_t dx2_size = outputs_[1]->ElementsNum();
|
||||
size_t dy_size = in_tensors_.at(0)->ElementsNum();
|
||||
size_t dx1_size = out_tensors_.at(0)->ElementsNum();
|
||||
size_t dx2_size = out_tensors_[1]->ElementsNum();
|
||||
(this->*arithmetic_grad_)(dy, dy_size, dx1, dx1_size, dx2, dx2_size);
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ class ArithmeticGradCPUKernel : public LiteKernel {
|
|||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive), tile_data0(NULL), tile_data1(NULL), tile_data2(NULL) {
|
||||
switch (type()) {
|
||||
switch (Type()) {
|
||||
case PrimitiveType_MulGrad:
|
||||
arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul; // this will be adjusted in InferShape
|
||||
break;
|
||||
|
|
|
@ -27,33 +27,9 @@ using mindspore::lite::RET_OK;
|
|||
using mindspore::schema::PrimitiveType_BiasGrad;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int BiasGradCPUKernel::InferShape() {
|
||||
if (1 != this->inputs_.size()) {
|
||||
MS_LOG(ERROR) << "BiasGrad should have one input";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (1 != this->outputs_.size()) {
|
||||
MS_LOG(ERROR) << "BiasGrad should have one output";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto *in0 = inputs_.front();
|
||||
auto *out = outputs_.front();
|
||||
MS_ASSERT(in0 != nullptr);
|
||||
MS_ASSERT(out != nullptr);
|
||||
auto inshape = in0->shape();
|
||||
int ndim = inshape.size();
|
||||
for (int i = 0; i < ndim - 1; i++) {
|
||||
inshape[i] = 1;
|
||||
}
|
||||
out->set_shape(inshape);
|
||||
out->set_data_type(in0->data_type());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BiasGradCPUKernel::Init() {
|
||||
MS_ASSERT(InferShape() == RET_OK);
|
||||
|
||||
auto dims = inputs_[0]->shape();
|
||||
auto dims = in_tensors_[0]->shape();
|
||||
bias_param->ndim_ = dims.size();
|
||||
for (unsigned int i = 0; i < bias_param->ndim_; i++) {
|
||||
bias_param->in_shape0_[i] = dims[i];
|
||||
|
@ -75,8 +51,8 @@ int BiasGradCPUKernel::Run() {
|
|||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto in = reinterpret_cast<float *>(inputs_.at(0)->Data());
|
||||
auto out = reinterpret_cast<float *>(outputs_.at(0)->Data());
|
||||
auto in = reinterpret_cast<float *>(in_tensors_.at(0)->Data());
|
||||
auto out = reinterpret_cast<float *>(out_tensors_.at(0)->Data());
|
||||
|
||||
size_t nhw_size = 1;
|
||||
size_t channels = bias_param->in_shape0_[bias_param->ndim_ - 1]; // C in NHWC
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BIAS_GRAD_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BIAS_GRAD_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_BIAS_GRAD_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_BIAS_GRAD_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
|
@ -35,7 +35,6 @@ class BiasGradCPUKernel : public LiteKernel {
|
|||
~BiasGradCPUKernel() override = default;
|
||||
|
||||
int Init() override;
|
||||
int InferShape();
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
|
||||
|
@ -44,4 +43,4 @@ class BiasGradCPUKernel : public LiteKernel {
|
|||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BIAS_GRAD_H_
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_BIAS_GRAD_H_
|
||||
|
|
|
@ -14,11 +14,11 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32_grad/bn_grad.h"
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/runtime/kernel/arm/fp32_grad/bn_grad.h"
|
||||
#include "nnacl/fp32_grad/batch_norm.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
|
@ -27,79 +27,103 @@ using mindspore::lite::KernelRegistrar;
|
|||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
// using mindspore::lite::REG_OP;
|
||||
using mindspore::schema::PrimitiveType_BNGradInput;
|
||||
using mindspore::schema::PrimitiveType_BNGrad;
|
||||
|
||||
/*
|
||||
{dy}
|
||||
{x }
|
||||
{scale }
|
||||
{save_mean }
|
||||
{save_inv_variance }
|
||||
*/
|
||||
namespace mindspore::kernel {
|
||||
int BNGradInputCPUKernel::Init() {
|
||||
auto bn_param = reinterpret_cast<bnParameter *>(opParameter);
|
||||
workspace_size = 5 * bn_param->channels;
|
||||
workspace = new (std::nothrow) float[workspace_size];
|
||||
|
||||
#if 0
|
||||
OpParameter *PopulateBNGradParameter(const lite::Primitive *primitive) {
|
||||
BNGradParameter *param = new (std::nothrow) BNGradParameter();
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "new Param for conv grad filter failed.";
|
||||
return nullptr;
|
||||
}
|
||||
param->op_parameter_.type_ = primitive->Type();
|
||||
|
||||
auto bngrad_primitive = primitive->Value()->value_as_BNGrad();
|
||||
param->epsilon_ = bngrad_primitive->eps();
|
||||
param->momentum_ = bngrad_primitive->momentum();
|
||||
return reinterpret_cast<OpParameter *>(param);
|
||||
}
|
||||
#endif
|
||||
int BNGradCPUKernel::Init() {
|
||||
auto *input_x = in_tensors_.at(1);
|
||||
int channels = input_x->shape().at(kNHWC_C);
|
||||
workspace_size = 5 * channels;
|
||||
workspace = new (std::nothrow) float[workspace_size];
|
||||
if (workspace == nullptr) {
|
||||
MS_LOG(ERROR) << "new workspace fail!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (2 != this->inputs_.size()) {
|
||||
MS_LOG(ERROR) << "Conv2d Grad should has 2 inputs";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (1 != this->outputs_.size()) {
|
||||
MS_LOG(ERROR) << "Conv2d Grad should has one output";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto *input_tensor = inputs_.at(0);
|
||||
auto *out_tensor = outputs_.at(0);
|
||||
auto in_shape = input_tensor->shape();
|
||||
out_tensor->set_shape(in_shape);
|
||||
out_tensor->set_data_type(input_tensor->data_type());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BNGradInputCPUKernel::ReSize() { return RET_OK; }
|
||||
int BNGradCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int BNGradInputCPUKernel::Run() {
|
||||
auto *input_x = inputs_.at(0);
|
||||
auto *input_yt = inputs_.at(1);
|
||||
auto *input_scale = inputs_.at(2);
|
||||
auto *output_grad = outputs_.at(0);
|
||||
auto bn_param = reinterpret_cast<bnParameter *>(opParameter);
|
||||
int batch = bn_param->batch;
|
||||
int channels = bn_param->channels;
|
||||
int spatial = bn_param->spatial;
|
||||
float eps = bn_param->eps;
|
||||
int BNGradCPUKernel::Run() {
|
||||
// std::cout << "run succ" << std::endl;
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto bn_param = reinterpret_cast<BNGradParameter *>(op_parameter_);
|
||||
auto *input_yt = in_tensors_.at(0);
|
||||
auto *input_x = in_tensors_.at(1);
|
||||
auto *input_scale = in_tensors_.at(2);
|
||||
auto *output_dx = out_tensors_.at(0);
|
||||
auto *output_scale = out_tensors_.at(1);
|
||||
auto *output_bias = out_tensors_.at(2);
|
||||
// Tensor *bias = input[5];
|
||||
int batch = input_x->Batch();
|
||||
int channels = input_x->Channel();
|
||||
int spatial = input_x->Height() * input_x->Width();
|
||||
float eps = bn_param->epsilon_;
|
||||
std::fill(workspace, workspace + workspace_size, 0.f);
|
||||
|
||||
float *mean = workspace;
|
||||
float *variance = mean + channels;
|
||||
float *mean_delta = variance + channels;
|
||||
float *invar = mean + channels;
|
||||
float *mean_delta = invar + channels;
|
||||
float *variance_delta = mean_delta + channels;
|
||||
float *mean_add_delta = variance_delta + channels;
|
||||
|
||||
float *x = reinterpret_cast<float *>(input_x->Data());
|
||||
float *yt = reinterpret_cast<float *>(input_yt->Data());
|
||||
float *scale = reinterpret_cast<float *>(input_scale->Data());
|
||||
float *out = reinterpret_cast<float *>(output_grad->Data());
|
||||
float *dx = reinterpret_cast<float *>(output_dx->Data());
|
||||
float *dscale = reinterpret_cast<float *>(output_scale->Data());
|
||||
float *dbias = reinterpret_cast<float *>(output_bias->Data());
|
||||
|
||||
std::copy(yt, yt + batch * channels * spatial, out);
|
||||
meanVar(x, batch, spatial, channels, mean, variance);
|
||||
scaleBias(scale, batch, channels, spatial, out);
|
||||
meanDelta(out, spatial, channels, eps, variance, mean_delta);
|
||||
varianceDelta(x, out, mean, variance, batch, channels, spatial, eps, variance_delta);
|
||||
std::copy(yt, yt + batch * channels * spatial, dx);
|
||||
meanVar(x, batch, spatial, channels, eps, mean, invar);
|
||||
scaleBias(scale, batch, channels, spatial, dx);
|
||||
meanDelta(dx, spatial, channels, invar, mean_delta);
|
||||
varianceDelta(x, dx, mean, invar, batch, channels, spatial, variance_delta);
|
||||
meanAdd(x, mean, variance_delta, batch, channels, spatial, mean_add_delta, mean_delta);
|
||||
NormalizeDelta(x, mean, variance, mean_delta, variance_delta, batch, channels, eps, spatial, out);
|
||||
NormalizeDelta(x, mean, invar, mean_delta, variance_delta, batch, channels, spatial, dx);
|
||||
// dbias
|
||||
sumSpatialBatch(yt, batch * spatial, channels, dbias);
|
||||
// dscale
|
||||
backwardScale(x, mean, invar, yt, batch, channels, spatial, dscale);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuBNGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
kernel::LiteKernel *CpuBNGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_BNGradInput);
|
||||
auto *kernel = new (std::nothrow) BNGradInputCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_BNGrad);
|
||||
auto *kernel = new (std::nothrow) BNGradCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new BNGradInputCPUKernel fail!";
|
||||
MS_LOG(ERROR) << "new BNGradCPUKernel fail!";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
|
@ -112,5 +136,5 @@ kernel::LiteKernel *CpuBNGradInputFp32KernelCreator(const std::vector<lite::tens
|
|||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_BNGradInput, CpuBNGradInputFp32KernelCreator)
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_BNGrad, CpuBNGradFp32KernelCreator)
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -14,21 +14,25 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_BNGRAD_INPUT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_BNGRAD_INPUT_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_BN_GRAD_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_BN_GRAD_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "ir/anf.h"
|
||||
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class BNGradInputCPUKernel : public LiteKernel {
|
||||
|
||||
|
||||
|
||||
class BNGradCPUKernel : public LiteKernel {
|
||||
public:
|
||||
explicit BNGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
explicit BNGradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
~BNGradInputCPUKernel() override { delete workspace; }
|
||||
~BNGradCPUKernel() override { delete workspace; }
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
|
@ -38,5 +42,8 @@ class BNGradInputCPUKernel : public LiteKernel {
|
|||
float *workspace;
|
||||
int workspace_size;
|
||||
};
|
||||
|
||||
// OpParameter *PopulateBNGradParameter(const lite::Primitive *primitive);
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_BNGRAD_INPUT_H_
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_BN_GRAD_H_
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32_grad/convolution.h"
|
||||
#include "nnacl/fp32_grad/pack_ext.h"
|
||||
#include "nnacl/fp32_grad/gemm.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int ConvolutionTrainCPUKernel::Init() {
|
||||
auto conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
|
||||
auto *input_x = in_tensors_.at(kInputIndex);
|
||||
auto *input_weight = in_tensors_.at(kWeightIndex);
|
||||
auto *out_y = out_tensors_.at(kOutputIndex);
|
||||
|
||||
conv_param_->output_batch_ = out_y->shape().at(kNHWC_N);
|
||||
conv_param_->input_batch_ = input_x->shape().at(kNHWC_N);
|
||||
conv_param_->input_h_ = input_x->shape().at(kNHWC_H);
|
||||
conv_param_->input_w_ = input_x->shape().at(kNHWC_W);
|
||||
conv_param_->output_h_ = out_y->shape().at(kNHWC_H);
|
||||
conv_param_->output_w_ = out_y->shape().at(kNHWC_W);
|
||||
conv_param_->input_channel_ = input_x->shape().at(kNHWC_C);
|
||||
conv_param_->output_channel_ = input_weight->shape().at(kNHWC_N);
|
||||
conv_param_->kernel_h_ = input_weight->shape().at(kNHWC_H);
|
||||
conv_param_->kernel_w_ = input_weight->shape().at(kNHWC_W);
|
||||
|
||||
int ws_size = conv_param_->output_h_ * conv_param_->output_w_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ *
|
||||
conv_param_->input_channel_ / conv_param_->group_;
|
||||
|
||||
workspace = new float[ws_size];
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionTrainCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int ConvolutionTrainCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
|
||||
auto *input_x = in_tensors_.at(kInputIndex);
|
||||
auto *input_w = in_tensors_.at(kWeightIndex);
|
||||
auto *out_y = out_tensors_.at(kOutputIndex);
|
||||
|
||||
auto x_addr = reinterpret_cast<float *>(input_x->Data());
|
||||
auto y_addr = reinterpret_cast<float *>(out_y->Data());
|
||||
auto w_addr = reinterpret_cast<float *>(input_w->Data());
|
||||
|
||||
int i, j;
|
||||
int nweights = input_w->ElementsNum();
|
||||
int in_ch = conv_param_->input_channel_;
|
||||
int in_h = conv_param_->input_h_;
|
||||
int in_w = conv_param_->input_w_;
|
||||
int k_h = conv_param_->kernel_h_;
|
||||
int k_w = conv_param_->kernel_w_;
|
||||
int batch = conv_param_->output_batch_;
|
||||
int out_ch = conv_param_->output_channel_; // out_y->shape()[3];
|
||||
int groups = conv_param_->group_;
|
||||
int out_h = conv_param_->output_h_;
|
||||
int out_w = conv_param_->output_w_;
|
||||
int m = out_h * out_w;
|
||||
int n = out_ch / groups;
|
||||
int k = k_h * k_w * in_ch / groups;
|
||||
|
||||
memset(y_addr, 0, out_y->Size());
|
||||
|
||||
for (i = 0; i < batch; ++i) {
|
||||
for (j = 0; j < groups; ++j) {
|
||||
float *mat_a = workspace;
|
||||
float *mat_b = w_addr + j * nweights / groups;
|
||||
float *mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups);
|
||||
float *im = x_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups);
|
||||
im2col_hwc(im, mat_a, conv_param_);
|
||||
gemm(0, 1, m, n, k, 1, mat_a, k, mat_b, k, 1, mat_c, out_ch);
|
||||
}
|
||||
}
|
||||
|
||||
// std::cout << "run succ" << std::endl;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc, const lite::PrimitiveC *primitive) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D);
|
||||
|
||||
auto *kernel = new (std::nothrow) ConvolutionTrainCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
MS_ASSERT(kernel != nullptr);
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,47 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_CONVOLUTION_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_CONVOLUTION_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "ir/anf.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ConvolutionTrainCPUKernel : public LiteKernel {
|
||||
public:
|
||||
explicit ConvolutionTrainCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
~ConvolutionTrainCPUKernel() override { delete [] workspace; }
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
|
||||
private:
|
||||
float *workspace;
|
||||
};
|
||||
|
||||
kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc, const lite::PrimitiveC *primitive);
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_CONVOLUTION_H_
|
|
@ -33,30 +33,24 @@ int ConvolutionGradFilterCPUKernel::Init() {
|
|||
// x is in input 1
|
||||
// dw is output 0
|
||||
|
||||
if (2 != this->inputs_.size()) {
|
||||
MS_LOG(ERROR) << "Conv2d Grad should has 2 inputs";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (1 != this->outputs_.size()) {
|
||||
MS_LOG(ERROR) << "Conv2d Grad should has one output";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto *input_tensor = inputs_.at(1);
|
||||
MS_ASSERT(input_tensor != nullptr);
|
||||
auto *dy = inputs_.at(0);
|
||||
MS_ASSERT(dy != nullptr);
|
||||
auto *weight_tensor = outputs_.at(0);
|
||||
auto *x_tensor = in_tensors_.at(1);
|
||||
MS_ASSERT(x_tensor != nullptr);
|
||||
auto *dy_tensor = in_tensors_.at(0);
|
||||
MS_ASSERT(dy_tensor != nullptr);
|
||||
auto *weight_tensor = out_tensors_.at(0);
|
||||
MS_ASSERT(weight_tensor != nullptr);
|
||||
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
|
||||
conv_param->output_batch_ = this->inputs_.at(0)->shape().at(kNHWC_N);
|
||||
conv_param->input_batch_ = this->inputs_.at(1)->shape().at(kNHWC_N);
|
||||
conv_param->input_h_ = this->inputs_.at(1)->shape().at(kNHWC_H);
|
||||
conv_param->input_w_ = this->inputs_.at(1)->shape().at(kNHWC_W);
|
||||
// assume OutCh|kh|kw|In
|
||||
conv_param->input_channel_ = this->inputs_.at(1)->shape().at(kNHWC_C);
|
||||
conv_param->output_channel_ = this->outputs_.at(0)->shape().at(kNHWC_N);
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_);
|
||||
conv_param->output_batch_ = dy_tensor->shape().at(kNHWC_N);
|
||||
conv_param->input_batch_ = x_tensor->shape().at(kNHWC_N);
|
||||
conv_param->input_h_ = x_tensor->shape().at(kNHWC_H);
|
||||
conv_param->input_w_ = x_tensor->shape().at(kNHWC_W);
|
||||
// assume OutCh|kh|kw|InCh
|
||||
conv_param->input_channel_ = x_tensor->shape().at(kNHWC_C);
|
||||
conv_param->output_channel_ = dy_tensor->shape().at(kNHWC_C);
|
||||
// TBD
|
||||
conv_param->output_h_ = dy_tensor->shape()[kNHWC_H];
|
||||
conv_param->output_w_ = dy_tensor->shape()[kNHWC_W];
|
||||
|
||||
int ws_size = conv_param->output_h_ * conv_param->output_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ *
|
||||
conv_param->input_channel_ / conv_param->group_;
|
||||
|
@ -67,34 +61,21 @@ int ConvolutionGradFilterCPUKernel::Init() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
int output_w = 0;
|
||||
int output_h = 0;
|
||||
output_h = dy->shape()[kNHWC_H];
|
||||
output_w = dy->shape()[kNHWC_W];
|
||||
|
||||
std::vector<int> out_shape(4);
|
||||
out_shape.at(0) = conv_param->output_channel_;
|
||||
out_shape.at(1) = conv_param->kernel_h_;
|
||||
out_shape.at(2) = conv_param->kernel_w_;
|
||||
out_shape.at(3) = conv_param->input_channel_ / conv_param->group_;
|
||||
|
||||
// weight is output
|
||||
weight_tensor->set_shape(out_shape);
|
||||
weight_tensor->set_data_type(input_tensor->data_type());
|
||||
|
||||
conv_param->output_h_ = output_h;
|
||||
conv_param->output_w_ = output_w;
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionGradFilterCPUKernel::ReSize() { return 0; }
|
||||
int ConvolutionGradFilterCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int ConvolutionGradFilterCPUKernel::Run() {
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
|
||||
auto *input_dy = inputs_.at(0);
|
||||
auto *input_x = inputs_.at(1);
|
||||
auto *out_dw = outputs_.at(0);
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_);
|
||||
auto *input_dy = in_tensors_.at(0);
|
||||
auto *input_x = in_tensors_.at(1);
|
||||
auto *out_dw = out_tensors_.at(0);
|
||||
|
||||
auto x_addr = reinterpret_cast<float *>(input_x->Data());
|
||||
auto dy_addr = reinterpret_cast<float *>(input_dy->Data());
|
||||
|
@ -135,7 +116,48 @@ int ConvolutionGradFilterCPUKernel::Run() {
|
|||
// std::cout << "run succ" << std::endl;
|
||||
return RET_OK;
|
||||
}
|
||||
#if 0
|
||||
OpParameter *PopulateConvolutionGradFilterParameter(const lite::Primitive *primitive) {
|
||||
ConvParameter *param = new (std::nothrow) ConvParameter();
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "new Param for conv grad filter failed.";
|
||||
return nullptr;
|
||||
}
|
||||
param->op_parameter_.type_ = primitive->Type();
|
||||
|
||||
auto convg_primitive = primitive->Value()->value_as_Conv2DGradFilter();
|
||||
param->kernel_h_ = convg_primitive->kernelH();
|
||||
param->kernel_w_ = convg_primitive->kernelW();
|
||||
param->stride_h_ = convg_primitive->strideH();
|
||||
param->stride_w_ = convg_primitive->strideW();
|
||||
param->dilation_h_ = convg_primitive->dilateH();
|
||||
param->dilation_w_ = convg_primitive->dilateW();
|
||||
param->pad_h_ = convg_primitive->padUp();
|
||||
param->pad_w_ = convg_primitive->padLeft();
|
||||
param->pad_u_ = convg_primitive->padUp();
|
||||
param->pad_d_ = convg_primitive->padDown();
|
||||
param->pad_l_ = convg_primitive->padLeft();
|
||||
param->pad_r_ = convg_primitive->padRight();
|
||||
param->group_ = convg_primitive->group();
|
||||
auto act_type = convg_primitive->activationType();
|
||||
switch (act_type) {
|
||||
case schema::ActivationType_RELU:
|
||||
param->is_relu_ = true;
|
||||
param->is_relu6_ = false;
|
||||
break;
|
||||
case schema::ActivationType_RELU6:
|
||||
param->is_relu_ = false;
|
||||
param->is_relu6_ = true;
|
||||
break;
|
||||
default:
|
||||
param->is_relu_ = false;
|
||||
param->is_relu6_ = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return reinterpret_cast<OpParameter *>(param);
|
||||
}
|
||||
#endif
|
||||
kernel::LiteKernel *CpuConvGradFilterFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/**
|
||||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
@ -28,15 +28,17 @@ class ConvolutionGradFilterCPUKernel : public LiteKernel {
|
|||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
~ConvolutionGradFilterCPUKernel() override { delete workspace; }
|
||||
~ConvolutionGradFilterCPUKernel() override { delete [] workspace; }
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
|
||||
private:
|
||||
float *workspace;
|
||||
float *workspace = nullptr;
|
||||
};
|
||||
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_CONVOLUTION_GRAD_FILTER_H_
|
||||
|
|
|
@ -29,23 +29,14 @@ using mindspore::schema::PrimitiveType_Conv2DGradInput;
|
|||
|
||||
namespace mindspore::kernel {
|
||||
int ConvolutionGradInputCPUKernel::Init() {
|
||||
if (2 != this->inputs_.size()) {
|
||||
MS_LOG(ERROR) << "Conv2d Grad should has 2 inputs";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (1 != this->outputs_.size()) {
|
||||
MS_LOG(ERROR) << "Conv2d Grad should has one output";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto *dy_tensor = inputs_.at(kInputIndex);
|
||||
auto *dy_tensor = in_tensors_.at(kInputIndex);
|
||||
MS_ASSERT(dy_tensor != nullptr);
|
||||
auto *weight_tensor = inputs_.at(kWeightIndex);
|
||||
auto *weight_tensor = in_tensors_.at(kWeightIndex);
|
||||
MS_ASSERT(weight_tensor != nullptr);
|
||||
auto *dx_tensor = outputs_.at(kOutputIndex);
|
||||
auto *dx_tensor = out_tensors_.at(kOutputIndex);
|
||||
MS_ASSERT(dx_tensor != nullptr);
|
||||
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_);
|
||||
conv_param->output_batch_ = dx_tensor->shape()[(kNHWC_N)];
|
||||
conv_param->input_batch_ = dy_tensor->shape()[(kNHWC_N)];
|
||||
|
||||
|
@ -74,10 +65,16 @@ int ConvolutionGradInputCPUKernel::Init() {
|
|||
int ConvolutionGradInputCPUKernel::ReSize() { return 0; }
|
||||
|
||||
int ConvolutionGradInputCPUKernel::Run() {
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
|
||||
auto *input_dy = inputs_.at(0);
|
||||
auto *input_w = inputs_.at(1);
|
||||
auto *out_dx = outputs_.at(0);
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_);
|
||||
auto *input_dy = in_tensors_.at(0);
|
||||
auto *input_w = in_tensors_.at(1);
|
||||
auto *out_dx = out_tensors_.at(0);
|
||||
|
||||
auto dy_addr = reinterpret_cast<float *>(input_dy->Data());
|
||||
auto w_addr = reinterpret_cast<float *>(input_w->Data());
|
||||
|
@ -116,6 +113,49 @@ int ConvolutionGradInputCPUKernel::Run() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
OpParameter *PopulateConvolutionGradInputParameter(const lite::Primitive *primitive) {
|
||||
ConvParameter *param = new (std::nothrow) ConvParameter();
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "new Param for conv grad input failed.";
|
||||
return nullptr;
|
||||
}
|
||||
param->op_parameter_.type_ = primitive->Type();
|
||||
|
||||
auto convg_primitive = primitive->Value()->value_as_Conv2DGradInput();
|
||||
param->kernel_h_ = convg_primitive->kernelH();
|
||||
param->kernel_w_ = convg_primitive->kernelW();
|
||||
param->stride_h_ = convg_primitive->strideH();
|
||||
param->stride_w_ = convg_primitive->strideW();
|
||||
param->dilation_h_ = convg_primitive->dilateH();
|
||||
param->dilation_w_ = convg_primitive->dilateW();
|
||||
param->pad_h_ = convg_primitive->padUp();
|
||||
param->pad_w_ = convg_primitive->padLeft();
|
||||
param->pad_u_ = convg_primitive->padUp();
|
||||
param->pad_d_ = convg_primitive->padDown();
|
||||
param->pad_l_ = convg_primitive->padLeft();
|
||||
param->pad_r_ = convg_primitive->padRight();
|
||||
param->group_ = convg_primitive->group();
|
||||
auto act_type = convg_primitive->activationType();
|
||||
switch (act_type) {
|
||||
case schema::ActivationType_RELU:
|
||||
param->is_relu_ = true;
|
||||
param->is_relu6_ = false;
|
||||
break;
|
||||
case schema::ActivationType_RELU6:
|
||||
param->is_relu_ = false;
|
||||
param->is_relu6_ = true;
|
||||
break;
|
||||
default:
|
||||
param->is_relu_ = false;
|
||||
param->is_relu6_ = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return reinterpret_cast<OpParameter *>(param);
|
||||
}
|
||||
#endif
|
||||
|
||||
kernel::LiteKernel *CpuConvGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
|
|
|
@ -28,7 +28,7 @@ class ConvolutionGradInputCPUKernel : public LiteKernel {
|
|||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
~ConvolutionGradInputCPUKernel() override { delete workspace; }
|
||||
~ConvolutionGradInputCPUKernel() override { delete [] workspace; }
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
|
@ -37,6 +37,9 @@ class ConvolutionGradInputCPUKernel : public LiteKernel {
|
|||
private:
|
||||
float *workspace;
|
||||
};
|
||||
|
||||
// OpParameter *PopulateConvolutionGradInputParameter(const lite::Primitive *primitive);
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_CONVOLUTION_GRAD_INPUT_H
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include "src/runtime/kernel/arm/fp32_grad/depend.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_Depend;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
int DependCPUKernel::Init() {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int DependCPUKernel::ReSize() { return 0; }
|
||||
|
||||
int DependCPUKernel::Run() {
|
||||
#if 0
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto in = reinterpret_cast<float *>(in_tensors_.at(0)->Data());
|
||||
auto out = reinterpret_cast<float *>(out_tensors_.at(0)->Data());
|
||||
|
||||
memcpy(out, in, in_tensors_.at(0)->Size());
|
||||
#endif
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuDependFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc, const lite::PrimitiveC *primitive) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Depend);
|
||||
auto *kernel =
|
||||
new (std::nothrow) DependCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
MS_ASSERT(kernel != nullptr);
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeBool, PrimitiveType_Depend, CpuDependFp32KernelCreator)
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,46 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DEPEND_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DEPEND_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "ir/anf.h"
|
||||
|
||||
#include "nnacl/fp32/arithmetic.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class DependCPUKernel : public LiteKernel {
|
||||
public:
|
||||
explicit DependCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
param = parameter;
|
||||
}
|
||||
~DependCPUKernel() override = default;
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
|
||||
private:
|
||||
OpParameter *param;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DEPEND_H_
|
|
@ -0,0 +1,46 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_MAKE_TUPLE_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_MAKE_TUPLE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "ir/anf.h"
|
||||
|
||||
#include "src/runtime/kernel/arm/nnacl/fp32/arithmetic.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class MakeTupleCPUKernel : public LiteKernel {
|
||||
public:
|
||||
explicit MakeTupleCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const lite::Primitive *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
param = parameter;
|
||||
}
|
||||
~MakeTupleCPUKernel() override = default;
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
|
||||
private:
|
||||
OpParameter *param;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_MAKE_TUPLE_H_
|
|
@ -1,87 +0,0 @@
|
|||
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/runtime/kernel/arm/fp32_grad/opt_momentum.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_OptMomentum;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
int OptMomentumCPUKernel::ReSize() { return 0; }
|
||||
|
||||
int OptMomentumCPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
if (inputs_.size() != 5 || !outputs_.empty()) {
|
||||
MS_LOG(ERROR) << "OptMomentumCPUKernel error input output size!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (inputs_[0]->ElementsNum() != inputs_[1]->ElementsNum() ||
|
||||
inputs_[0]->ElementsNum() != inputs_[3]->ElementsNum()) {
|
||||
MS_LOG(ERROR) << "error input data size!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto weight = reinterpret_cast<float *>(inputs_[0]->Data());
|
||||
auto accumulate = reinterpret_cast<float *>(inputs_[1]->Data());
|
||||
float learning_rate = reinterpret_cast<float *>(inputs_[2]->Data())[0];
|
||||
auto gradient = reinterpret_cast<float *>(inputs_[3]->Data());
|
||||
float moment = reinterpret_cast<float *>(inputs_[4]->Data())[0];
|
||||
size_t elem_num = inputs_[0]->ElementsNum();
|
||||
for (size_t i = 0; i < elem_num; ++i) {
|
||||
accumulate[i] = accumulate[i] * moment + gradient[i];
|
||||
weight[i] -= accumulate[i] * learning_rate;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int OptMomentumCPUKernel::Init() { return 0; }
|
||||
|
||||
kernel::LiteKernel *CpuOptMomentumFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_OptMomentum);
|
||||
auto *kernel = new (std::nothrow) OptMomentumCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new OptMomentumCPUKernel fail!";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (0 != ret) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_OptMomentum, CpuOptMomentumFp32KernelCreator)
|
||||
} // namespace mindspore::kernel
|
|
@ -20,6 +20,7 @@
|
|||
#include "nnacl/fp32/pooling.h"
|
||||
#include "nnacl/fp32_grad/pooling_grad.h"
|
||||
#include "include/errorcode.h"
|
||||
// #include "src/train/ops/train_ops.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
|
@ -29,9 +30,15 @@ using mindspore::schema::PrimitiveType_PoolingGrad;
|
|||
|
||||
namespace mindspore::kernel {
|
||||
int PoolingGradCPUKernel::Init() {
|
||||
PoolingParameter *pool_param = reinterpret_cast<PoolingParameter *>(opParameter);
|
||||
PoolingParameter *pool_param = reinterpret_cast<PoolingParameter *>(op_parameter_);
|
||||
|
||||
auto in_shape = inputs_.at(0)->shape();
|
||||
auto in_shape = in_tensors_.at(0)->shape();
|
||||
auto out_shape = in_tensors_.at(1)->shape();
|
||||
|
||||
if (pool_param->pool_mode_ == PoolMode_AvgPool) {
|
||||
in_shape = in_tensors_.at(1)->shape();
|
||||
out_shape = in_tensors_.at(0)->shape();
|
||||
}
|
||||
int input_h = in_shape.at(1);
|
||||
int input_w = in_shape.at(2);
|
||||
|
||||
|
@ -40,25 +47,39 @@ int PoolingGradCPUKernel::Init() {
|
|||
pool_param->window_h_ = input_h;
|
||||
}
|
||||
|
||||
// Emir -- here I assume we get the outputshape in the output tensor
|
||||
auto *out_tensor = outputs_.front();
|
||||
auto out_shape = out_tensor->shape();
|
||||
pool_param->input_h_ = in_shape[kNHWC_H];
|
||||
pool_param->input_w_ = in_shape[kNHWC_W];
|
||||
pool_param->input_batch_ = in_shape[kNHWC_N];
|
||||
pool_param->input_channel_ = in_shape[kNHWC_C];
|
||||
|
||||
// Emir -- here I assume we get the outputshape in the output tensor
|
||||
// auto *out_tensor = out_tensors_.front();
|
||||
// auto out_shape = in_tensors_.at(1)->shape();
|
||||
|
||||
pool_param->output_h_ = out_shape[kNHWC_H];
|
||||
pool_param->output_w_ = out_shape[kNHWC_W];
|
||||
pool_param->output_batch_ = out_shape[kNHWC_N];
|
||||
pool_param->output_channel_ = out_shape[kNHWC_C];
|
||||
|
||||
out_tensor->set_shape(out_shape);
|
||||
out_tensor->set_data_type(inputs_.at(0)->data_type());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int PoolingGradCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int PoolingGradCPUKernel::Run() {
|
||||
PoolingParameter *pool_param = reinterpret_cast<PoolingParameter *>(opParameter);
|
||||
auto input_ptr = reinterpret_cast<float *>(inputs_.at(0)->Data());
|
||||
auto output_ptr = reinterpret_cast<float *>(outputs_.at(0)->Data());
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
PoolingParameter *pool_param = reinterpret_cast<PoolingParameter *>(op_parameter_);
|
||||
auto input_ptr = reinterpret_cast<float *>(in_tensors_.at(0)->Data());
|
||||
auto output_ptr = reinterpret_cast<float *>(out_tensors_.at(0)->Data());
|
||||
|
||||
if (pool_param->pool_mode_ == PoolMode_MaxPool) {
|
||||
auto ind = reinterpret_cast<int *>(inputs_.at(1)->Data());
|
||||
MaxPoolingGrad(input_ptr, ind, output_ptr, pool_param);
|
||||
auto dx_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->Data());
|
||||
auto dy_ptr = reinterpret_cast<float *>(in_tensors_.at(2)->Data());
|
||||
MaxPoolingGrad(input_ptr, dx_ptr, dy_ptr, output_ptr, pool_param);
|
||||
} else {
|
||||
AvgPoolingGrad(input_ptr, output_ptr, pool_param);
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ class PoolingGradCPUKernel : public LiteKernel {
|
|||
private:
|
||||
uint8_t data_shape_{0};
|
||||
};
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_POOLING_GRAD_H_
|
||||
|
|
|
@ -31,10 +31,10 @@ int PowerGradCPUKernel::Init() { return RET_OK; }
|
|||
int PowerGradCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int PowerGradCPUKernel::Run() {
|
||||
auto dy_addr = reinterpret_cast<float *>(inputs_.at(0)->Data());
|
||||
auto x_addr = reinterpret_cast<float *>(inputs_.at(1)->Data());
|
||||
auto dx_addr = reinterpret_cast<float *>(outputs_.at(0)->Data());
|
||||
auto size = inputs_.at(0)->ElementsNum();
|
||||
auto dy_addr = reinterpret_cast<float *>(in_tensors_.at(0)->Data());
|
||||
auto x_addr = reinterpret_cast<float *>(in_tensors_.at(1)->Data());
|
||||
auto dx_addr = reinterpret_cast<float *>(out_tensors_.at(0)->Data());
|
||||
auto size = in_tensors_.at(0)->ElementsNum();
|
||||
|
||||
float exp = power_ - 1;
|
||||
Power(x_addr, &exp, dx_addr, size, scale_, shift_, true);
|
||||
|
@ -47,6 +47,7 @@ int PowerGradCPUKernel::Run() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
|
||||
kernel::LiteKernel *CpuPowerGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
|
|
|
@ -45,6 +45,7 @@ class PowerGradCPUKernel : public LiteKernel {
|
|||
float scale_;
|
||||
float shift_;
|
||||
};
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_POWER_GRAD_H_
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include "src/kernel_registry.h"
|
||||
#include "nnacl/softmax_parameter.h"
|
||||
#include "nnacl/fp32/softmax.h"
|
||||
|
@ -46,9 +47,10 @@ void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const int
|
|||
output[0] = total_loss / param->batch_size_;
|
||||
}
|
||||
|
||||
void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *labels, const float *losses,
|
||||
void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *labels, const float *losses, float *grads,
|
||||
float *output) const {
|
||||
size_t row_start = 0;
|
||||
float total_loss = 0;
|
||||
for (int i = 0; i < param->batch_size_; ++i) {
|
||||
if (labels[i] < 0) {
|
||||
MS_LOG(EXCEPTION) << "label value must >= 0";
|
||||
|
@ -56,78 +58,88 @@ void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *la
|
|||
size_t label = labels[i];
|
||||
if (label > param->number_of_classes_) {
|
||||
MS_LOG(EXCEPTION) << "error label input!";
|
||||
}
|
||||
for (size_t j = 0; j < param->number_of_classes_; ++j) {
|
||||
size_t index = row_start + j;
|
||||
if (j == label) {
|
||||
output[index] = (losses[index] - 1) / param->batch_size_;
|
||||
} else {
|
||||
output[index] = losses[index] / param->batch_size_;
|
||||
} else {
|
||||
total_loss -= logf(losses[i * param->number_of_classes_ + label]);
|
||||
for (size_t j = 0; j < param->number_of_classes_; ++j) {
|
||||
size_t index = row_start + j;
|
||||
if (j == label) {
|
||||
grads[index] = (losses[index] - 1) / param->batch_size_;
|
||||
} else {
|
||||
grads[index] = losses[index] / param->batch_size_;
|
||||
}
|
||||
}
|
||||
}
|
||||
row_start += param->number_of_classes_;
|
||||
}
|
||||
output[0] = total_loss / param->batch_size_;
|
||||
}
|
||||
|
||||
int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Run() {
|
||||
auto ins = reinterpret_cast<float *>(inputs_.at(0)->Data());
|
||||
auto labels = reinterpret_cast<int *>(inputs_.at(1)->Data());
|
||||
auto out = reinterpret_cast<float *>(outputs_.at(1)->Data());
|
||||
float *grads = NULL;
|
||||
if (is_train()) { // outputs_.size() > 1)
|
||||
grads = reinterpret_cast<float *>(outputs_.at(0)->Data());
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return ret;
|
||||
}
|
||||
size_t data_size = inputs_.at(0)->ElementsNum();
|
||||
|
||||
auto ins = reinterpret_cast<float *>(in_tensors_.at(0)->Data());
|
||||
auto labels = reinterpret_cast<int *>(in_tensors_.at(1)->Data());
|
||||
float *out = reinterpret_cast<float *>(out_tensors_.at(0)->Data());
|
||||
float *grads = NULL;
|
||||
if (is_train() && out_tensors_.size() > 1) {
|
||||
grads = reinterpret_cast<float *>(out_tensors_.at(1)->Data());
|
||||
}
|
||||
size_t data_size = in_tensors_.at(0)->ElementsNum();
|
||||
float *losses = new (std::nothrow) float[data_size];
|
||||
if (losses == nullptr) {
|
||||
MS_LOG(ERROR) << "losses is null";
|
||||
return nullptr;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
std::fill(losses, losses + data_size, 0);
|
||||
|
||||
MS_ASSERT(out != nullptr);
|
||||
MS_ASSERT(labels != nullptr);
|
||||
MS_ASSERT(ins != nullptr);
|
||||
|
||||
SoftmaxParameter sm_params;
|
||||
sm_params.n_dim_ = param->n_dim_;
|
||||
sm_params.element_size_ = data_size;
|
||||
sm_params.axis_ = 0;
|
||||
for (int i = 0; i < 4; i++) // softmax has only 4 params in shape
|
||||
sm_params.input_shape_[i] = param->input_shape_[i];
|
||||
float sum_data[sm_params.input_shape_[sm_params.axis_]] = {0};
|
||||
std::fill(sum_data, sum_data + sm_params.input_shape_[sm_params.axis_], 0);
|
||||
Softmax(ins, losses, sum_data, &sm_params);
|
||||
|
||||
std::fill(losses_, losses_ + data_size, 0);
|
||||
std::fill(sum_data_, sum_data_ + sm_params_.input_shape_[0], 0);
|
||||
Softmax(ins, losses_, sum_data_, &sm_params_);
|
||||
if (is_train()) {
|
||||
GradPostExecute(labels, losses, grads);
|
||||
} else {
|
||||
ForwardPostExecute(labels, losses, out);
|
||||
GradPostExecute(labels, losses_, grads, out);
|
||||
} else if (out != nullptr) {
|
||||
ForwardPostExecute(labels, losses_, out);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Init() {
|
||||
if (context_->infer_shape_interrupt_ && !context_->running_) {
|
||||
SetNeedReInit();
|
||||
return RET_OK;
|
||||
}
|
||||
auto dims = inputs_[0]->shape();
|
||||
// if (context_ && context_->infer_shape_interrupt_ && !context_->running_) {
|
||||
// set_need_reinit();
|
||||
// return RET_OK;
|
||||
// }
|
||||
auto dims = in_tensors_[0]->shape();
|
||||
param->n_dim_ = 2;
|
||||
param->number_of_classes_ = dims[1];
|
||||
param->batch_size_ = dims[0];
|
||||
for (unsigned int i = 0; i < dims.size(); i++) param->input_shape_[i] = dims[i];
|
||||
if (2 != this->inputs_.size()) {
|
||||
if (2 != this->in_tensors_.size()) {
|
||||
MS_LOG(ERROR) << "softmax entropy loss should have two inputs";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto *in0 = inputs_.front();
|
||||
auto *in0 = in_tensors_.front();
|
||||
if (in0 == nullptr) {
|
||||
MS_LOG(ERROR) << "softmax etropy loss in0 have no data";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
size_t data_size = in_tensors_.at(0)->ElementsNum();
|
||||
losses_ = new (std::nothrow) float[data_size];
|
||||
sum_data_ = new (std::nothrow) float[dims[0]];
|
||||
MS_ASSERT(losses_ != nullptr);
|
||||
MS_ASSERT(sum_data_ != nullptr);
|
||||
|
||||
sm_params_.n_dim_ = 2;
|
||||
sm_params_.element_size_ = data_size;
|
||||
sm_params_.axis_ = 1;
|
||||
for (int i = 0; i < dims.size(); i++) sm_params_.input_shape_[i] = dims[i];
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -14,31 +14,32 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/train/loss_kernel.h"
|
||||
#include "ir/anf.h"
|
||||
#include "nnacl/fp32_grad/softmax_grad.h"
|
||||
#include "nnacl/fp32/arithmetic.h"
|
||||
#include "nnacl/softmax_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LiteKernel {
|
||||
class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel {
|
||||
public:
|
||||
explicit SparseSoftmaxCrossEntropyWithLogitsCPUKernel(OpParameter *parameter,
|
||||
const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
const lite::Context *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
: LossKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
param = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter);
|
||||
}
|
||||
~SparseSoftmaxCrossEntropyWithLogitsCPUKernel() override = default;
|
||||
~SparseSoftmaxCrossEntropyWithLogitsCPUKernel() override { delete[] losses_; delete[] sum_data_; }
|
||||
|
||||
void ForwardPostExecute(const int *labels, const float *losses, float *output) const;
|
||||
void GradPostExecute(const int *labels, const float *losses, float *output) const;
|
||||
void GradPostExecute(const int *labels, const float *losses, float* grads, float *output) const;
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
|
@ -46,7 +47,11 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LiteKernel {
|
|||
|
||||
private:
|
||||
SoftmaxCrossEntropyParameter *param;
|
||||
SoftmaxParameter sm_params_;
|
||||
float *losses_ = nullptr;
|
||||
float *sum_data_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_H_
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include "src/runtime/kernel/arm/fp32_grad/tuple_getitem.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_TupleGetItem;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
int TupleGetItemCPUKernel::Init() {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int TupleGetItemCPUKernel::ReSize() { return 0; }
|
||||
|
||||
int TupleGetItemCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto in = reinterpret_cast<float *>(in_tensors_.at(0)->Data());
|
||||
auto out = reinterpret_cast<float *>(out_tensors_.at(0)->Data());
|
||||
|
||||
memcpy(out, in, in_tensors_.at(0)->Size());
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuTupleGetItemFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc, const lite::PrimitiveC *primitive) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_TupleGetItem);
|
||||
auto *kernel =
|
||||
new (std::nothrow) TupleGetItemCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
MS_ASSERT(kernel != nullptr);
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_TupleGetItem, CpuTupleGetItemFp32KernelCreator)
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,46 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_TUPLE_GETITEM_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_TUPLE_GETITEM_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "ir/anf.h"
|
||||
|
||||
#include "nnacl/fp32/arithmetic.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class TupleGetItemCPUKernel : public LiteKernel {
|
||||
public:
|
||||
explicit TupleGetItemCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
param = parameter;
|
||||
}
|
||||
~TupleGetItemCPUKernel() override = default;
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
|
||||
private:
|
||||
OpParameter *param;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_TUPLE_GETITEM_H_
|
|
@ -94,8 +94,10 @@ int Scheduler::InferShape(const lite::Model *model, std::vector<tensor::Tensor *
|
|||
inputs.emplace_back(tensors->at(size_t(inIndexes->GetAs<uint32_t>(j))));
|
||||
}
|
||||
auto outIndexes = cNode->outputIndex();
|
||||
for (size_t j = 0; j < outIndexes->size(); j++) {
|
||||
outputs.emplace_back(tensors->at(size_t(outIndexes->GetAs<uint32_t>(j))));
|
||||
if (outIndexes != nullptr) {
|
||||
for (size_t j = 0; j < outIndexes->size(); j++) {
|
||||
outputs.emplace_back(tensors->at(size_t(outIndexes->GetAs<uint32_t>(j))));
|
||||
}
|
||||
}
|
||||
auto *primitive = model->GetOp(cNode->name()->str());
|
||||
if (primitive == nullptr) {
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_TRAIN_LOSS_KERNEL_H_
|
||||
#define MINDSPORE_LITE_SRC_TRAIN_LOSS_KERNEL_H_
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
namespace mindspore::kernel {
|
||||
|
||||
class LossKernel : public LiteKernel {
|
||||
public:
|
||||
LossKernel() = default;
|
||||
explicit LossKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
const lite::Context *ctx,
|
||||
const lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
~LossKernel() = default;
|
||||
};
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_TRAIN_LOSS_KERNEL_H_
|
|
@ -0,0 +1,250 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/populate_parameter.h"
|
||||
#include "src/train/train_populate_parameter.h"
|
||||
#include "src/ops/pooling_grad.h"
|
||||
#include "nnacl/pooling_parameter.h"
|
||||
#include "src/ops/softmax_cross_entropy.h"
|
||||
#include "nnacl/fp32_grad/softmax_grad.h"
|
||||
#include "src/ops/activation_grad.h"
|
||||
#include "nnacl/fp32/activation.h"
|
||||
#include "src/ops/conv2d_grad_filter.h"
|
||||
#include "src/ops/conv2d_grad_input.h"
|
||||
#include "nnacl/conv_parameter.h"
|
||||
#include "src/ops/power_grad.h"
|
||||
#include "nnacl/power_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
OpParameter *DefaultPopulateParameter(const mindspore::lite::PrimitiveC *primitive) {
|
||||
if (primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
OpParameter *param = new (std::nothrow) OpParameter();
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "new Param for primitive failed.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
param->type_ = primitive->Type();
|
||||
return param;
|
||||
}
|
||||
|
||||
OpParameter *PopulateSoftmaxCrossEntropyParameter(const mindspore::lite::PrimitiveC *primitive) {
|
||||
if (primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op.";
|
||||
return nullptr;
|
||||
}
|
||||
SoftmaxCrossEntropyParameter *sce_param = new (std::nothrow) SoftmaxCrossEntropyParameter();
|
||||
if (sce_param == nullptr) {
|
||||
MS_LOG(ERROR) << "new SoftmaxCrossEntropyParameter failed.";
|
||||
return nullptr;
|
||||
}
|
||||
sce_param->op_parameter_.type_ = primitive->Type();
|
||||
return reinterpret_cast<OpParameter *>(sce_param);
|
||||
}
|
||||
|
||||
OpParameter *PopulatePoolingGradParameter(const mindspore::lite::PrimitiveC *primitive) {
|
||||
if (primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op.";
|
||||
return nullptr;
|
||||
}
|
||||
PoolingParameter *pooling_param = new (std::nothrow) PoolingParameter();
|
||||
if (pooling_param == nullptr) {
|
||||
MS_LOG(ERROR) << "new PoolingParameter failed.";
|
||||
return nullptr;
|
||||
}
|
||||
pooling_param->op_parameter_.type_ = primitive->Type();
|
||||
auto pooling_primitive =
|
||||
reinterpret_cast<mindspore::lite::PoolingGrad *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
|
||||
|
||||
pooling_param->global_ = pooling_primitive->GetGlobal();
|
||||
pooling_param->window_w_ = pooling_primitive->GetWindowW();
|
||||
pooling_param->window_h_ = pooling_primitive->GetWindowH();
|
||||
|
||||
pooling_param->pad_u_ = pooling_primitive->GetPadUp();
|
||||
pooling_param->pad_d_ = pooling_primitive->GetPadDown();
|
||||
pooling_param->pad_l_ = pooling_primitive->GetPadLeft();
|
||||
pooling_param->pad_r_ = pooling_primitive->GetPadRight();
|
||||
pooling_param->stride_w_ = pooling_primitive->GetStrideW();
|
||||
pooling_param->stride_h_ = pooling_primitive->GetStrideH();
|
||||
|
||||
pooling_param->pool_mode_ = PoolMode_No;
|
||||
pooling_param->round_mode_ = RoundMode_No;
|
||||
|
||||
switch (pooling_primitive->GetPoolingMode()) {
|
||||
case schema::PoolMode_MAX_POOLING:
|
||||
pooling_param->pool_mode_ = PoolMode_MaxPool;
|
||||
break;
|
||||
case schema::PoolMode_MEAN_POOLING:
|
||||
pooling_param->pool_mode_ = PoolMode_AvgPool;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (pooling_primitive->GetRoundMode()) {
|
||||
case schema::RoundMode_FLOOR:
|
||||
pooling_param->round_mode_ = RoundMode_Floor;
|
||||
break;
|
||||
case schema::RoundMode_CEIL:
|
||||
pooling_param->round_mode_ = RoundMode_Ceil;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return reinterpret_cast<OpParameter *>(pooling_param);
|
||||
}
|
||||
|
||||
OpParameter *PopulateActivationGradParameter(const mindspore::lite::PrimitiveC *primitive) {
|
||||
if (primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ActivationParameter *act_param = new (std::nothrow) ActivationParameter();
|
||||
if (act_param == nullptr) {
|
||||
MS_LOG(ERROR) << "new ActivationParameter failed.";
|
||||
return nullptr;
|
||||
}
|
||||
act_param->op_parameter_.type_ = primitive->Type();
|
||||
auto activation =
|
||||
reinterpret_cast<mindspore::lite::ActivationGrad *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
|
||||
act_param->type_ = static_cast<int>(activation->GetType());
|
||||
act_param->alpha_ = activation->GetAlpha();
|
||||
return reinterpret_cast<OpParameter *>(act_param);
|
||||
}
|
||||
|
||||
OpParameter *PopulateConvolutionGradFilterParameter(const mindspore::lite::PrimitiveC *primitive) {
|
||||
if (primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ConvParameter *param = new (std::nothrow) ConvParameter();
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "new Param for conv grad filter failed.";
|
||||
return nullptr;
|
||||
}
|
||||
param->op_parameter_.type_ = primitive->Type();
|
||||
|
||||
auto convg_primitive =
|
||||
reinterpret_cast<mindspore::lite::Conv2DGradFilter *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
|
||||
param->kernel_h_ = convg_primitive->GetKernelH();
|
||||
param->kernel_w_ = convg_primitive->GetKernelW();
|
||||
param->stride_h_ = convg_primitive->GetStrideH();
|
||||
param->stride_w_ = convg_primitive->GetStrideW();
|
||||
param->dilation_h_ = convg_primitive->GetDilateH();
|
||||
param->dilation_w_ = convg_primitive->GetDilateW();
|
||||
param->pad_u_ = convg_primitive->GetPadUp();
|
||||
param->pad_d_ = convg_primitive->GetPadDown();
|
||||
param->pad_l_ = convg_primitive->GetPadLeft();
|
||||
param->pad_r_ = convg_primitive->GetPadRight();
|
||||
param->group_ = convg_primitive->GetGroup();
|
||||
param->act_type_ = ActType_No;
|
||||
switch (convg_primitive->GetActivationType()) {
|
||||
case schema::ActivationType_RELU:
|
||||
param->act_type_ = ActType_Relu;
|
||||
break;
|
||||
case schema::ActivationType_RELU6:
|
||||
param->act_type_ = ActType_Relu6;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return reinterpret_cast<OpParameter *>(param);
|
||||
}
|
||||
|
||||
OpParameter *PopulateConvolutionGradInputParameter(const mindspore::lite::PrimitiveC *primitive) {
|
||||
if (primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ConvParameter *param = new (std::nothrow) ConvParameter();
|
||||
if (param == nullptr) {
|
||||
MS_LOG(ERROR) << "new Param for conv grad filter failed.";
|
||||
return nullptr;
|
||||
}
|
||||
param->op_parameter_.type_ = primitive->Type();
|
||||
|
||||
auto convg_primitive =
|
||||
reinterpret_cast<mindspore::lite::Conv2DGradInput *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
|
||||
param->kernel_h_ = convg_primitive->GetKernelH();
|
||||
param->kernel_w_ = convg_primitive->GetKernelW();
|
||||
param->stride_h_ = convg_primitive->GetStrideH();
|
||||
param->stride_w_ = convg_primitive->GetStrideW();
|
||||
param->dilation_h_ = convg_primitive->GetDilateH();
|
||||
param->dilation_w_ = convg_primitive->GetDilateW();
|
||||
param->pad_u_ = convg_primitive->GetPadUp();
|
||||
param->pad_d_ = convg_primitive->GetPadDown();
|
||||
param->pad_l_ = convg_primitive->GetPadLeft();
|
||||
param->pad_r_ = convg_primitive->GetPadRight();
|
||||
param->group_ = convg_primitive->GetGroup();
|
||||
param->act_type_ = ActType_No;
|
||||
switch (convg_primitive->GetActivationType()) {
|
||||
case schema::ActivationType_RELU:
|
||||
param->act_type_ = ActType_Relu;
|
||||
break;
|
||||
case schema::ActivationType_RELU6:
|
||||
param->act_type_ = ActType_Relu6;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return reinterpret_cast<OpParameter *>(param);
|
||||
}
|
||||
|
||||
OpParameter *PopulatePowerGradParameter(const mindspore::lite::PrimitiveC *primitive) {
|
||||
if (primitive == nullptr) {
|
||||
MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
PowerParameter *power_param = new (std::nothrow) PowerParameter();
|
||||
if (power_param == nullptr) {
|
||||
MS_LOG(ERROR) << "new PowerParameter failed.";
|
||||
return nullptr;
|
||||
}
|
||||
power_param->op_parameter_.type_ = primitive->Type();
|
||||
auto power = reinterpret_cast<mindspore::lite::PowerGrad *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
|
||||
power_param->power_ = power->GetPower();
|
||||
power_param->scale_ = power->GetScale();
|
||||
power_param->shift_ = power->GetShift();
|
||||
return reinterpret_cast<OpParameter *>(power_param);
|
||||
}
|
||||
|
||||
void PopulateTrainParameters() {
|
||||
auto ppr = PopulateParameterRegistry::GetInstance();
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_ApplyMomentum, DefaultPopulateParameter);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_BiasGrad, PopulateArithmetic);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_SoftmaxCrossEntropy, PopulateSoftmaxCrossEntropyParameter);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_ActivationGrad, PopulateActivationGradParameter);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_TupleGetItem, DefaultPopulateParameter);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_Depend, DefaultPopulateParameter);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_BNGrad, DefaultPopulateParameter);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_Conv2DGradFilter, PopulateConvolutionGradFilterParameter);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_Conv2DGradInput, PopulateConvolutionGradInputParameter);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_PoolingGrad, PopulatePoolingGradParameter);
|
||||
ppr->AddPopulateParameterFunc(schema::PrimitiveType_PowerGrad, PopulatePowerGradParameter);
|
||||
}
|
||||
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,28 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_TRAIN_TRAIN_POPULATE_PARAMETER_H_
|
||||
#define MINDSPORE_LITE_SRC_TRAIN_TRAIN_POPULATE_PARAMETER_H_
|
||||
|
||||
#include "src/ops/primitive_c.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
void PopulateTrainParameters();
|
||||
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_TRAIN_TRAIN_POPULATE_PARAMETER_H_
|
|
@ -0,0 +1,136 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "include/train_session.h"
|
||||
#include <algorithm>
|
||||
#include "utils/log_adapter.h"
|
||||
#include "include/context.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "mindspore/lite/src/ir/tensor.h"
|
||||
#include "src/train/loss_kernel.h"
|
||||
#include "src/train/train_populate_parameter.h"
|
||||
#include "src/runtime/runtime_api.h"
|
||||
#include "src/executor.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/runtime/kernel/arm/fp32_grad/convolution.h"
|
||||
|
||||
namespace mindspore::session {
|
||||
|
||||
TrainSession::TrainSession() { kernel::PopulateTrainParameters(); }
|
||||
|
||||
void TrainSession::ReplaceOps() {
|
||||
mindspore::lite::KernelRegistrar tmp(mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32,
|
||||
mindspore::schema::PrimitiveType_Conv2D,
|
||||
mindspore::kernel::CpuConvTrainFp32KernelCreator);
|
||||
}
|
||||
|
||||
int TrainSession::CompileGraph(lite::Model *model) {
|
||||
model_ = model;
|
||||
ReplaceOps();
|
||||
return LiteSession::CompileGraph(model);
|
||||
}
|
||||
|
||||
void* TrainSession::ExportToBuf(void* buf, size_t *len) const {
|
||||
// auto train_model_impl = (dynamic_cast<lite::train::TrainModelImpl*>(model_->model_impl()));
|
||||
// return train_model_impl->ExportToBuf(buf, len);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
int TrainSession::RunGraph(const session::KernelCallBack &before, const session::KernelCallBack &after) {
|
||||
auto ms_output_tensors = GetOutputs();
|
||||
this->outputs_.clear();
|
||||
for (auto ms_tensors : ms_output_tensors)
|
||||
for (auto ms_tensor : ms_tensors.second)
|
||||
this->outputs_.push_back((dynamic_cast<lite::tensor::LiteTensor*>(ms_tensor))->tensor());
|
||||
if (train_mode_)
|
||||
return LiteSession::RunGraph(before, after);
|
||||
|
||||
// object is expected to run only inference part of graph
|
||||
// prepare a lit of kernels till the loss function -- temporary solution
|
||||
std::vector<kernel::LiteKernel *> infference_kernels;
|
||||
for (auto kernel : this->kernels_) {
|
||||
if (dynamic_cast<const kernel::LossKernel*>(kernel) != nullptr)
|
||||
break;
|
||||
infference_kernels.push_back(kernel);
|
||||
}
|
||||
|
||||
MS_EXCEPTION_IF_NULL(this->context_);
|
||||
// TODO(Emir)
|
||||
// SetMaxWokerNum(context_->thread_num_);
|
||||
// context_->running_ = true;
|
||||
lite::Executor executor;
|
||||
if (before == nullptr && after == nullptr) {
|
||||
return executor.Run(this->inputs_, this->outputs_, infference_kernels, this->context_->allocator.get());
|
||||
} else {
|
||||
return executor.Run(this->inputs_, this->outputs_, infference_kernels, this->context_->allocator.get(),
|
||||
before, after);
|
||||
}
|
||||
}
|
||||
|
||||
void TrainSession::train() {
|
||||
for (auto *kernel : kernels_) {
|
||||
MS_ASSERT(nullptr != kernel);
|
||||
kernel->train();
|
||||
}
|
||||
train_mode_ = true;
|
||||
ext_output_map_.clear();
|
||||
for (auto kernel : this->kernels_) {
|
||||
if (dynamic_cast<const kernel::LossKernel*>(kernel) != nullptr) {
|
||||
auto *ms_tensor = new lite::tensor::LiteTensor(kernel->out_tensors().at(0));
|
||||
ext_output_map_[kernel->name()].emplace_back(ms_tensor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TrainSession::eval() {
|
||||
for (auto *kernel : kernels_) {
|
||||
MS_ASSERT(nullptr != kernel);
|
||||
kernel->eval();
|
||||
}
|
||||
train_mode_ = false;
|
||||
kernel::LiteKernel* last_kernel = nullptr;
|
||||
// We should get in_kernels and then get all last kernels
|
||||
ext_output_map_ = output_node_map_;
|
||||
for (auto kernel : this->kernels_) {
|
||||
if ((dynamic_cast<const kernel::LossKernel*>(kernel) != nullptr) &&
|
||||
(last_kernel != nullptr)) {
|
||||
auto *ms_tensor = new lite::tensor::LiteTensor(last_kernel->out_tensors().at(0));
|
||||
ext_output_map_[last_kernel->name()].emplace_back(ms_tensor);
|
||||
}
|
||||
last_kernel = kernel;
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> TrainSession::GetOutputs() const {
|
||||
return ext_output_map_;
|
||||
}
|
||||
std::vector<tensor::MSTensor *> TrainSession::GetOutputsByName(const std::string &name) const {
|
||||
auto ret_vect = LiteSession::GetOutputsByNodeName(name); // TODO(emir): GetOutputsByTensorName?
|
||||
if (ret_vect.size() > 0)
|
||||
return ret_vect;
|
||||
auto ret = ext_output_map_.find(name);
|
||||
if (ret == ext_output_map_.end()) {
|
||||
MS_LOG(WARNING) << "Node " << name << " is not an output node";
|
||||
std::vector<mindspore::tensor::MSTensor *> empty_ret;
|
||||
return empty_ret;
|
||||
}
|
||||
return ret->second;
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace mindspore::session
|
|
@ -259,6 +259,10 @@ endif()
|
|||
if (SUPPORT_TRAIN)
|
||||
set(TEST_LITE_SRC
|
||||
${TEST_LITE_SRC}
|
||||
# ${LITE_DIR}/src/train/ops/train_ops.cc
|
||||
${LITE_DIR}/src/train/train_populate_parameter.cc
|
||||
${LITE_DIR}/src/train/train_session.cc
|
||||
${LITE_DIR}/src/lite_session.cc
|
||||
# ${SRC_DIR}/common/trans.cc
|
||||
# ${SRC_DIR}/common/lite/trans_extends.cc
|
||||
# ${SRC_DIR}/kernel/kernel_build_info.cc
|
||||
|
|
|
@ -25,9 +25,10 @@
|
|||
#include "mindspore/lite/src/ir/tensor.h"
|
||||
#include "mindspore/lite/src/lite_kernel.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.h"
|
||||
#include "nnacl/fp32_grad/activation_grad.h"
|
||||
|
||||
namespace mindspore {
|
||||
class TestActGradFp32 : public mindspore::CommonTest {
|
||||
class TestActGradFp32 : public mindspore::CommonTest {
|
||||
public:
|
||||
TestActGradFp32() {}
|
||||
};
|
||||
|
@ -41,13 +42,14 @@ TEST_F(TestActGradFp32, ReluGradFp32) {
|
|||
size_t input_size;
|
||||
std::string input_path = "./test_data/activationGrad/relu_y_50.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
EXPECT_EQ(input_size, output_data_size * sizeof(float));
|
||||
std::string yt_path = "./test_data/activationGrad/relu_yt_50.bin";
|
||||
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
|
||||
|
||||
EXPECT_EQ(input_size, output_data_size * sizeof(float));
|
||||
auto output_data = new float[output_data_size];
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
ReluGrad(yt_data, input_data, 50, output_data);
|
||||
ReluGrad(yt_data, input_data, output_data_size, output_data);
|
||||
}
|
||||
|
||||
int loop_count = 100;
|
||||
|
@ -72,9 +74,9 @@ TEST_F(TestActGradFp32, ReluGradFp32) {
|
|||
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete input_data;
|
||||
delete[] input_data;
|
||||
delete[] output_data;
|
||||
delete yt_data;
|
||||
delete[] yt_data;
|
||||
|
||||
MS_LOG(INFO) << "ReluGradFp32 passed";
|
||||
}
|
||||
|
@ -118,9 +120,9 @@ TEST_F(TestActGradFp32, Relu6GradFp32) {
|
|||
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete input_data;
|
||||
delete[] input_data;
|
||||
delete[] output_data;
|
||||
delete yt_data;
|
||||
delete[] yt_data;
|
||||
|
||||
MS_LOG(INFO) << "Relu6GradFp32 passed";
|
||||
}
|
||||
|
@ -164,9 +166,9 @@ TEST_F(TestActGradFp32, LReluGradFp32) {
|
|||
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete input_data;
|
||||
delete[] input_data;
|
||||
delete[] output_data;
|
||||
delete yt_data;
|
||||
delete[] yt_data;
|
||||
|
||||
MS_LOG(INFO) << "LReluGradFp32 passed";
|
||||
}
|
||||
|
@ -211,9 +213,9 @@ TEST_F(TestActGradFp32, SigmoidGradFp32) {
|
|||
EXPECT_EQ(res, 0);
|
||||
// lite::CompareOutput(output_data, output_path);
|
||||
|
||||
delete input_data;
|
||||
delete[] input_data;
|
||||
delete[] output_data;
|
||||
delete yt_data;
|
||||
delete[] yt_data;
|
||||
|
||||
MS_LOG(INFO) << "SigmoidGradFp32 passed";
|
||||
}
|
||||
|
@ -257,9 +259,9 @@ TEST_F(TestActGradFp32, tanhGradFp32) {
|
|||
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete input_data;
|
||||
delete[] input_data;
|
||||
delete[] output_data;
|
||||
delete yt_data;
|
||||
delete[] yt_data;
|
||||
MS_LOG(INFO) << "TanhGradFp32 passed";
|
||||
}
|
||||
|
||||
|
@ -267,24 +269,25 @@ TEST_F(TestActGradFp32, hswishGradFp32) {
|
|||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
uint64_t time_avg = 0;
|
||||
size_t output_data_size = 50;
|
||||
const size_t output_data_size = 10;
|
||||
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/activationGrad/hswish_x_50.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
EXPECT_EQ(input_size, output_data_size * sizeof(float));
|
||||
std::string yt_path = "./test_data/activationGrad/hswish_yt_50.bin";
|
||||
auto yt_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(yt_path.c_str(), &input_size));
|
||||
|
||||
EXPECT_EQ(input_size, output_data_size * sizeof(float));
|
||||
auto output_data = new float[output_data_size];
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
HSwishGrad(yt_data, input_data, 50, output_data);
|
||||
HSwishGrad(yt_data, input_data, static_cast<int>(output_data_size), output_data);
|
||||
}
|
||||
|
||||
int loop_count = 100;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
HSwishGrad(yt_data, input_data, 50, output_data);
|
||||
HSwishGrad(yt_data, input_data, output_data_size, output_data);
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
|
@ -292,7 +295,7 @@ TEST_F(TestActGradFp32, hswishGradFp32) {
|
|||
printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 20; i++) {
|
||||
for (int i = 0; i < std::min(output_data_size, 20UL); i++) {
|
||||
std::cout << output_data[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
@ -302,9 +305,9 @@ TEST_F(TestActGradFp32, hswishGradFp32) {
|
|||
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete input_data;
|
||||
delete[] input_data;
|
||||
delete[] output_data;
|
||||
delete yt_data;
|
||||
delete[] yt_data;
|
||||
MS_LOG(INFO) << "hswishGradFp32 passed";
|
||||
}
|
||||
|
||||
|
|
|
@ -106,9 +106,14 @@ TEST_F(TestArithmeticGradFp32, TestAddGradFp32) {
|
|||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_1_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
|
||||
for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete param;
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->Data());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// delete all_tensors;
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestAddGradFp32 passed";
|
||||
}
|
||||
|
||||
|
@ -137,9 +142,14 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) {
|
|||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_1_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
|
||||
for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete param;
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->Data());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i]; //TODO tensor data is unique pointer
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestAddGrad2Fp32 passed";
|
||||
}
|
||||
|
||||
|
@ -169,8 +179,14 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) {
|
|||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_8_dx1_5_4_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
|
||||
for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete param;
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->Data());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestAddGrad3Fp32 passed";
|
||||
}
|
||||
|
||||
|
@ -200,8 +216,14 @@ TEST_F(TestArithmeticGradFp32, TestSubGradFp32) {
|
|||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_2_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
|
||||
for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete param;
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->Data());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestSubGradFp32 passed";
|
||||
}
|
||||
|
||||
|
@ -231,8 +253,12 @@ TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) {
|
|||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_3_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
|
||||
for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete param;
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->Data());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestSubGrad2Fp32 passed";
|
||||
}
|
||||
|
||||
|
@ -271,9 +297,13 @@ TEST_F(TestArithmeticGradFp32, TestMulGradFp32) {
|
|||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_4_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
|
||||
for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete param;
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->Data());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
delete kernel_obj;
|
||||
// delete param;
|
||||
MS_LOG(INFO) << "TestMulGradFp32 passed";
|
||||
}
|
||||
|
||||
|
@ -302,9 +332,14 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) {
|
|||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_4_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
|
||||
for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete param;
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->Data());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestMulGrad2Fp32 passed";
|
||||
}
|
||||
|
||||
|
@ -333,9 +368,14 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) {
|
|||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_9_dx2_5_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
|
||||
for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete param;
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->Data());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestMulGrad3Fp32 passed";
|
||||
}
|
||||
|
||||
|
@ -364,9 +404,14 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) {
|
|||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_9_dx2_5_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
|
||||
for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete param;
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->Data());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestMulGrad4Fp32 passed";
|
||||
}
|
||||
|
||||
|
@ -395,9 +440,14 @@ TEST_F(TestArithmeticGradFp32, TestDivGradFp32) {
|
|||
|
||||
std::string dx2_path = "./test_data/operators/arithmetic_fp32_5_dx2_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, dx2_path));
|
||||
|
||||
for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete param;
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->Data());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete kernel_obj;
|
||||
// delete param;
|
||||
MS_LOG(INFO) << "TestDivGradFp32 passed";
|
||||
}
|
||||
|
||||
|
@ -427,8 +477,14 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) {
|
|||
std::string output_path = "./test_data/operators/arithmetic_fp32_6_dx1_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path));
|
||||
|
||||
for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete param;
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->Data());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestDivGrad2Fp32 passed";
|
||||
}
|
||||
|
||||
|
@ -457,9 +513,14 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) {
|
|||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_10_dx2_5_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path));
|
||||
|
||||
for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete param;
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->Data());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
// for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
// delete param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestDivGrad3Fp32 passed";
|
||||
}
|
||||
|
||||
|
@ -488,9 +549,12 @@ TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) {
|
|||
|
||||
std::string output_path = "./test_data/operators/arithmetic_fp32_7_dx2_1_1_6.bin";
|
||||
EXPECT_EQ(0, lite::CompareRelativeOutput(output_ptr, output_path));
|
||||
|
||||
for (int i = 0; i < 5; i++) delete all_tensors[i];
|
||||
delete param;
|
||||
for (auto tensor : all_tensors) {
|
||||
delete[] reinterpret_cast<float *>(tensor->Data());
|
||||
tensor->SetData(nullptr);
|
||||
delete tensor;
|
||||
}
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestDivGrad2Fp32 passed";
|
||||
}
|
||||
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
#include "utils/log_adapter.h"
|
||||
#include "common/common_test.h"
|
||||
#include "src/common/file_utils.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.h"
|
||||
#include "mindspore/lite/src/kernel_registry.h"
|
||||
#include "src/runtime/kernel/arm/fp32_grad/bias_grad.h"
|
||||
#include "src/kernel_registry.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
||||
|
@ -40,9 +40,8 @@ TEST_F(TestBiasGradFp32, BiasGradFp32) {
|
|||
dy_tensor.SetData(input_data);
|
||||
|
||||
std::vector<lite::tensor::Tensor *> inputs = {&dy_tensor};
|
||||
|
||||
auto output_data = new float[7];
|
||||
std::vector<int> dim_dw({7});
|
||||
std::vector<int> dim_dw = {7};
|
||||
lite::tensor::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw);
|
||||
dw_tensor.SetData(output_data);
|
||||
std::vector<lite::tensor::Tensor *> outputs = {&dw_tensor};
|
||||
|
@ -62,9 +61,12 @@ TEST_F(TestBiasGradFp32, BiasGradFp32) {
|
|||
std::string output_path = "./test_data/operators/biasgradfp32_1_db_7.bin";
|
||||
lite::CompareOutput(output_data, output_path);
|
||||
|
||||
// delete input_data;
|
||||
// delete[] output_data;
|
||||
delete bias_param;
|
||||
delete [] input_data;
|
||||
delete[] output_data;
|
||||
// delete bias_param;
|
||||
dy_tensor.SetData(nullptr);
|
||||
dw_tensor.SetData(nullptr);
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "BiasGradFp32 passed";
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include "utils/log_adapter.h"
|
||||
#include "common/common_test.h"
|
||||
#include "src/common/file_utils.h"
|
||||
#include "src/common/file_utils_ext.h"
|
||||
#include "src/runtime/kernel/arm/fp32_grad/bn_grad.h"
|
||||
#include "nnacl/fp32_grad/batch_norm.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#
|
||||
|
||||
namespace mindspore {
|
||||
|
||||
class TestBNGradFp32 : public mindspore::CommonTest {
|
||||
public:
|
||||
TestBNGradFp32() {}
|
||||
lite::tensor::Tensor *CreateInTensor(std::string file_name, std::vector<int> dim);
|
||||
};
|
||||
|
||||
lite::tensor::Tensor *TestBNGradFp32::CreateInTensor(std::string file_name, std::vector<int> dim) {
|
||||
size_t input_size = 0;
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(file_name.c_str(), &input_size));
|
||||
auto tensor = new lite::tensor::Tensor(TypeId::kNumberTypeFloat32, dim);
|
||||
tensor->SetData(input_data);
|
||||
EXPECT_EQ(input_size, tensor->Size());
|
||||
return tensor;
|
||||
}
|
||||
|
||||
TEST_F(TestBNGradFp32, BNGradFp32) {
|
||||
// prepare stage
|
||||
auto bn_param = new BNGradParameter();
|
||||
bn_param->epsilon_ = 0.00001;
|
||||
bn_param->momentum_ = 0.1;
|
||||
const int batch = 2;
|
||||
const int channels = 3;
|
||||
const int height = 4;
|
||||
const int width = 5;
|
||||
|
||||
auto dy_tensor = CreateInTensor("./test_data/bngrad/dy_2_4_5_3.bin", {batch, height, width, channels});
|
||||
auto x_tensor = CreateInTensor("./test_data/bngrad/input_x_2_4_5_3.bin", {batch, height, width, channels});
|
||||
auto scale_tensor = CreateInTensor("./test_data/bngrad/scale_3.bin", {1, 1, 1, channels});
|
||||
auto mean_tensor = CreateInTensor("./test_data/bngrad/save_mean_3.bin", {1, 1, 1, channels});
|
||||
auto var_tensor = CreateInTensor("././test_data/bngrad/save_var_3.bin", {1, 1, 1, channels});
|
||||
// prepare output tensors
|
||||
lite::tensor::Tensor dx_tensor(TypeId::kNumberTypeFloat32, {batch, height, width, channels});
|
||||
dx_tensor.MallocData();
|
||||
lite::tensor::Tensor dscale_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels});
|
||||
dscale_tensor.MallocData();
|
||||
lite::tensor::Tensor dbias_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels});
|
||||
dbias_tensor.MallocData();
|
||||
|
||||
std::vector<lite::tensor::Tensor *> inputs = {dy_tensor, x_tensor, scale_tensor, mean_tensor, var_tensor};
|
||||
std::vector<lite::tensor::Tensor *> outputs = {&dx_tensor, &dscale_tensor, &dbias_tensor};
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BNGrad};
|
||||
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bn_param), NULL, desc, nullptr);
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
kernel_obj->Run();
|
||||
}
|
||||
|
||||
int loop_count = 100;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
kernel_obj->Run();
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
auto time_avg = cost / loop_count;
|
||||
std::cout << "single thread running time : " << time_avg << "us\n";
|
||||
std::cout << "==========dx==========\n";
|
||||
auto dx = reinterpret_cast<float *>(outputs[0]->Data());
|
||||
for (int i = 0; i < 7; i++) std::cout << dx[i] << " ";
|
||||
std::cout << "\n=======dscale=======\n";
|
||||
auto dscale = reinterpret_cast<float *>(outputs[1]->Data());
|
||||
for (int i = 0; i < channels; i++) std::cout << dscale[i] << " ";
|
||||
std::cout << "\n";
|
||||
int res = mindspore::lite::CompareRelativeOutput(dscale, "./test_data/bngrad/output_dscale_3.bin");
|
||||
EXPECT_EQ(res, 0);
|
||||
std::cout << "==========dbias==========\n";
|
||||
auto dbias = reinterpret_cast<float *>(outputs[2]->Data());
|
||||
for (int i = 0; i < 3; i++) std::cout << dbias[i] << " ";
|
||||
std::cout << "\n";
|
||||
res = mindspore::lite::CompareRelativeOutput(dscale, "./test_data/bngrad/output_dscale_3.bin");
|
||||
EXPECT_EQ(res, 0);
|
||||
for (auto v : inputs) {
|
||||
delete[] reinterpret_cast<float *>(v->Data());
|
||||
v->SetData(nullptr);
|
||||
// delete v;
|
||||
}
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "BNGradFp32 passed";
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -21,6 +21,7 @@
|
|||
#include "common/common_test.h"
|
||||
#include "src/common/file_utils.h"
|
||||
#include "src/common/file_utils_ext.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h"
|
||||
#include "mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h"
|
||||
#include "mindspore/lite/nnacl/conv_parameter.h"
|
||||
|
@ -130,11 +131,14 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) {
|
|||
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
// delete input_data;
|
||||
// delete dy_data;
|
||||
// delete [] dw_data;
|
||||
delete [] input_data;
|
||||
delete [] dy_data;
|
||||
delete [] dw_data;
|
||||
delete kernel;
|
||||
delete conv_param;
|
||||
// delete conv_param;
|
||||
dw_tensor.SetData(nullptr);
|
||||
x_tensor.SetData(nullptr);
|
||||
dy_tensor.SetData(nullptr);
|
||||
MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed";
|
||||
}
|
||||
|
||||
|
@ -193,9 +197,15 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) {
|
|||
std::string output_path = "./test_data/conv/convfp32_dx_1_28_28_3.bin";
|
||||
auto res = lite::CompareRelativeOutput(dx_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete [] dx_data;
|
||||
delete [] w_data;
|
||||
delete [] dy_data;
|
||||
w_tensor.SetData(nullptr);
|
||||
dy_tensor.SetData(nullptr);
|
||||
dx_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
delete conv_param;
|
||||
// delete conv_param;
|
||||
|
||||
MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed";
|
||||
}
|
||||
|
||||
|
@ -254,11 +264,14 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) {
|
|||
auto res = lite::CompareRelativeOutput(dw_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
// delete input_data;
|
||||
// delete dy_data;
|
||||
// delete [] dw_data;
|
||||
delete [] input_data;
|
||||
delete [] dy_data;
|
||||
delete [] dw_data;
|
||||
dw_tensor.SetData(nullptr);
|
||||
x_tensor.SetData(nullptr);
|
||||
dy_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
delete conv_param;
|
||||
// delete conv_param;
|
||||
MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed";
|
||||
}
|
||||
|
||||
|
@ -317,9 +330,15 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) {
|
|||
std::string output_path = "./test_data/conv/convfp32_dx_g3_1_28_28_3.bin";
|
||||
auto res = lite::CompareRelativeOutput(dx_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
delete [] dx_data;
|
||||
delete [] w_data;
|
||||
delete [] dy_data;
|
||||
dx_tensor.SetData(nullptr);
|
||||
w_tensor.SetData(nullptr);
|
||||
dy_tensor.SetData(nullptr);
|
||||
|
||||
delete kernel;
|
||||
delete conv_param;
|
||||
// delete conv_param;
|
||||
MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed";
|
||||
}
|
||||
|
||||
|
@ -378,11 +397,14 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) {
|
|||
std::string output_path = "./test_data/conv/convfp32_dw_g3_d2_18_3_3_3.bin";
|
||||
auto res = lite::CompareRelativeOutput(dw_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
// delete input_data;
|
||||
// delete dy_data;
|
||||
// delete [] dw_data;
|
||||
delete [] input_data;
|
||||
delete [] dy_data;
|
||||
delete [] dw_data;
|
||||
dw_tensor.SetData(nullptr);
|
||||
dy_tensor.SetData(nullptr);
|
||||
x_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
delete conv_param;
|
||||
// delete conv_param;
|
||||
MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed";
|
||||
}
|
||||
|
||||
|
@ -441,80 +463,93 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) {
|
|||
std::string output_path = "./test_data/conv/convfp32_dx_g3_d2_1_28_28_3.bin";
|
||||
auto res = lite::CompareRelativeOutput(dx_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete [] dx_data;
|
||||
delete [] w_data;
|
||||
delete [] dy_data;
|
||||
dx_tensor.SetData(nullptr);
|
||||
dy_tensor.SetData(nullptr);
|
||||
w_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
delete conv_param;
|
||||
// delete conv_param;
|
||||
MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed";
|
||||
}
|
||||
|
||||
// TEST_F(TestConvolutionGradFp32, ConvGroupDilation) {
|
||||
// // prepare stage
|
||||
// auto conv_param = new ConvParameter();
|
||||
// InitConvParamGroup3Dilation2FP32(conv_param);
|
||||
TEST_F(TestConvolutionGradFp32, ConvGroupDilation) {
|
||||
// prepare stage
|
||||
auto conv_param = new ConvParameter();
|
||||
InitConvParamGroup3Dilation2FP32(conv_param);
|
||||
|
||||
// size_t x_size;
|
||||
// std::string x_path = "./test_data/conv/convfp32_x_g3_d2_1_28_28_3.bin";
|
||||
// auto x_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(x_path.c_str(), &x_size));
|
||||
// std::vector<int> dim_x({1, 28, 28, 3});
|
||||
// tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
// x_tensor.SetData(x_data);
|
||||
size_t x_size;
|
||||
std::string x_path = "./test_data/conv/convfp32_x_g3_d2_1_28_28_3.bin";
|
||||
auto x_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(x_path.c_str(), &x_size));
|
||||
std::vector<int> dim_x({1, 28, 28, 3});
|
||||
lite::tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
x_tensor.SetData(x_data);
|
||||
|
||||
// size_t w_size;
|
||||
// std::string w_path = "./test_data/conv/convfp32_w_g3_d2_18_3_3_3.bin";
|
||||
// auto w_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(w_path.c_str(), &w_size));
|
||||
// std::vector<int> dim_w({18, 3, 3, 1});
|
||||
// tensor::Tensor w_tensor(TypeId::kNumberTypeFloat32, dim_w);
|
||||
// w_tensor.SetData(w_data);
|
||||
size_t w_size;
|
||||
std::string w_path = "./test_data/conv/convfp32_w_g3_d2_18_3_3_3.bin";
|
||||
auto w_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(w_path.c_str(), &w_size));
|
||||
std::vector<int> dim_w({18, 3, 3, 1});
|
||||
lite::tensor::Tensor w_tensor(TypeId::kNumberTypeFloat32, dim_w);
|
||||
w_tensor.SetData(w_data);
|
||||
|
||||
// size_t output_data_size =
|
||||
// conv_param->output_batch_ * conv_param->output_h_ * conv_param->output_w_ * conv_param->output_channel_;
|
||||
// auto y_data = new float[output_data_size];
|
||||
// std::vector<int> dim_y({1, 26, 26, 18});
|
||||
// tensor::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
// y_tensor.SetData(y_data);
|
||||
size_t output_data_size =
|
||||
conv_param->output_batch_ * conv_param->output_h_ * conv_param->output_w_ * conv_param->output_channel_;
|
||||
auto y_data = new float[output_data_size];
|
||||
std::vector<int> dim_y({1, 26, 26, 18});
|
||||
lite::tensor::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
y_tensor.SetData(y_data);
|
||||
|
||||
// std::vector<tensor::Tensor *> inputs = {&x_tensor, &w_tensor};
|
||||
// std::vector<tensor::Tensor *> outputs = {&y_tensor};
|
||||
// // runtime part
|
||||
std::vector<lite::tensor::Tensor *> inputs = {&x_tensor, &w_tensor};
|
||||
std::vector<lite::tensor::Tensor *> outputs = {&y_tensor};
|
||||
// runtime part
|
||||
|
||||
// printf("Calculating runtime cost...\n");
|
||||
// uint64_t time_avg = 0;
|
||||
printf("Calculating runtime cost...\n");
|
||||
uint64_t time_avg = 0;
|
||||
|
||||
// lite::Context context;
|
||||
// ;
|
||||
// context.deviceCtx.type = lite::DT_CPU;
|
||||
// context.threadNum = 1;
|
||||
lite::Context context;
|
||||
context.device_ctx_.type = lite::DT_CPU;
|
||||
context.thread_num_ = 1;
|
||||
|
||||
// kernel::KernelKey desc = {kernel::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Conv2D};
|
||||
// auto creator = lite::KernelRegistry::GetInstance()->GetKernelCreator(desc);
|
||||
// auto kernel = creator(inputs, outputs, (OpParameter *)conv_param, &context, desc);
|
||||
|
||||
// kernel->train();
|
||||
// EXPECT_EQ(kernel->is_train(), 1);
|
||||
auto *kernel = new mindspore::kernel::ConvolutionTrainCPUKernel(reinterpret_cast<OpParameter *>(conv_param),
|
||||
inputs, outputs, &context, 0);
|
||||
kernel->Init();
|
||||
// kernel::KernelKey desc = {kernel::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Conv2D};
|
||||
// auto creator = lite::KernelRegistry::GetInstance()->GetKernelCreator(desc);
|
||||
// auto kernel = creator(inputs, outputs, (OpParameter *)conv_param, &context, desc);
|
||||
|
||||
// // warm up loop
|
||||
// for (int i = 0; i < 3; i++) {
|
||||
// kernel->Run();
|
||||
// }
|
||||
kernel->train();
|
||||
EXPECT_EQ(kernel->is_train(), 1);
|
||||
|
||||
// int loop_count = 100;
|
||||
// auto time_start = mindspore::lite::GetTimeUs();
|
||||
// for (int i = 0; i < loop_count; i++) {
|
||||
// kernel->Run();
|
||||
// }
|
||||
// auto time_end = mindspore::lite::GetTimeUs();
|
||||
// auto cost = time_end - time_start;
|
||||
// time_avg = cost / loop_count;
|
||||
// printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
kernel->Run();
|
||||
}
|
||||
|
||||
// std::string output_path = "./test_data/conv/convfp32_y_g3_d2_1_26_26_18.bin";
|
||||
// auto res = lite::CompareRelativeOutput(y_data, output_path);
|
||||
// EXPECT_EQ(res, 0);
|
||||
int loop_count = 100;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
kernel->Run();
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
time_avg = cost / loop_count;
|
||||
printf("single thread running time : %f ms\n", time_avg / 1000.0f);
|
||||
|
||||
// delete kernel;
|
||||
// delete conv_param;
|
||||
std::string output_path = "./test_data/conv/convfp32_y_g3_d2_1_26_26_18.bin";
|
||||
auto res = lite::CompareRelativeOutput(y_data, output_path);
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
// MS_LOG(INFO) << "TestConvolutionFp32 Filter Grad passed";
|
||||
// }
|
||||
delete [] y_data;
|
||||
delete [] x_data;
|
||||
delete [] w_data;
|
||||
x_tensor.SetData(nullptr);
|
||||
y_tensor.SetData(nullptr);
|
||||
w_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
|
||||
MS_LOG(INFO) << "TestConvolutionFp32 Filter Grad passed";
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -0,0 +1,564 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <dirent.h>
|
||||
#include <climits>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
|
||||
#include "mindspore/lite/schema/inner/model_generated.h"
|
||||
#include "mindspore/lite/include/model.h"
|
||||
#include "common/common_test.h"
|
||||
#include "include/train_session.h"
|
||||
// #include "include/lite_session.h"
|
||||
#include "include/context.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "src/common/file_utils.h"
|
||||
#include "src/common/file_utils_ext.h"
|
||||
|
||||
namespace mindspore {
|
||||
class NetworkTest : public mindspore::CommonTest {
|
||||
public:
|
||||
NetworkTest() {}
|
||||
};
|
||||
|
||||
|
||||
// INPUT(0)
|
||||
// V
|
||||
// +-------------+
|
||||
// | ReLU |
|
||||
// +-------------+
|
||||
// +---output(1) V
|
||||
// | V V weights(2) <----+
|
||||
// | +-------------+ |
|
||||
// | | MatMul | |
|
||||
// | +-------------+ |
|
||||
// | output(3) V |
|
||||
// | V V weights(4)<-+ |
|
||||
// | +-------------+ | |
|
||||
// | | Bias | | |
|
||||
// | +-------------+ | |
|
||||
// | output(5) V | |
|
||||
// | V V LABELS(6) | |
|
||||
// | +-------------+ | |
|
||||
// | | CrossEntropy| | |
|
||||
// | +-------------+ | |
|
||||
// | +-dy(7) V V------------------------->Loss (14)
|
||||
// | | V | |
|
||||
// | | +-------------+ | |
|
||||
// | | | BiasGrad | | |
|
||||
// | | +-------------+ | |
|
||||
// | | V db(8) | |
|
||||
// | | +--------Update---+ |
|
||||
// | +-------+ |
|
||||
// +------V V |
|
||||
// +-------------+ |
|
||||
// | MatMul | |
|
||||
// +-------------+ |
|
||||
// V dw(9) |
|
||||
// +-----------Update-----+
|
||||
|
||||
|
||||
TEST_F(NetworkTest, tuning_layer) {
|
||||
const int BATCH_SIZE = 32;
|
||||
const int NUM_CLASSES = 10;
|
||||
const int FEATURE_SIZE = 1000;
|
||||
auto meta_graph = std::make_shared<schema::MetaGraphT>();
|
||||
meta_graph->name = "graph";
|
||||
// define nodes
|
||||
{
|
||||
auto node = std::make_unique<schema::CNodeT>();
|
||||
node->inputIndex = {0};
|
||||
node->outputIndex = {1};
|
||||
node->primitive = std::make_unique<schema::PrimitiveT>();
|
||||
node->primitive->value.type = schema::PrimitiveType_Activation;
|
||||
auto primitive = new schema::ActivationT;
|
||||
primitive->type = schema::ActivationType_RELU;
|
||||
node->primitive->value.value = primitive;
|
||||
node->name = "ReLU";
|
||||
meta_graph->nodes.emplace_back(std::move(node));
|
||||
}
|
||||
{
|
||||
auto node = std::make_unique<schema::CNodeT>();
|
||||
node->inputIndex = {1, 2};
|
||||
node->outputIndex = {3};
|
||||
node->primitive = std::make_unique<schema::PrimitiveT>();
|
||||
node->primitive->value.type = schema::PrimitiveType_MatMul;
|
||||
auto primitive = new schema::MatMulT;
|
||||
primitive->transposeA = false;
|
||||
primitive->transposeB = true;
|
||||
node->primitive->value.value = primitive;
|
||||
node->name = "MatMul1";
|
||||
meta_graph->nodes.emplace_back(std::move(node));
|
||||
}
|
||||
{
|
||||
auto node = std::make_unique<schema::CNodeT>();
|
||||
node->inputIndex = {3, 4};
|
||||
node->outputIndex = {5};
|
||||
node->primitive = std::make_unique<schema::PrimitiveT>();
|
||||
node->primitive->value.type = schema::PrimitiveType_BiasAdd;
|
||||
auto primitive = new schema::BiasAddT;
|
||||
primitive->axis.push_back(0);
|
||||
node->primitive->value.value = primitive;
|
||||
node->name = "BiasAdd";
|
||||
meta_graph->nodes.emplace_back(std::move(node));
|
||||
}
|
||||
{
|
||||
auto node = std::make_unique<schema::CNodeT>();
|
||||
node->inputIndex = {5, 6};
|
||||
node->outputIndex = {14, 7};
|
||||
node->primitive = std::make_unique<schema::PrimitiveT>();
|
||||
node->primitive->value.type = schema::PrimitiveType_SoftmaxCrossEntropy;
|
||||
auto primitive = new schema::SoftmaxCrossEntropyT;
|
||||
primitive->axis.push_back(0);
|
||||
node->primitive->value.value = primitive;
|
||||
node->name = "SoftmaxCrossEntropy";
|
||||
meta_graph->nodes.emplace_back(std::move(node));
|
||||
}
|
||||
{
|
||||
auto node = std::make_unique<schema::CNodeT>();
|
||||
node->inputIndex = {7};
|
||||
node->outputIndex = {8};
|
||||
node->primitive = std::make_unique<schema::PrimitiveT>();
|
||||
node->primitive->value.type = schema::PrimitiveType_BiasGrad;
|
||||
auto primitive = new schema::BiasGradT;
|
||||
primitive->axis.push_back(0);
|
||||
node->primitive->value.value = primitive;
|
||||
node->name = "BiasGrad";
|
||||
meta_graph->nodes.emplace_back(std::move(node));
|
||||
}
|
||||
{
|
||||
auto node = std::make_unique<schema::CNodeT>();
|
||||
node->inputIndex = {7, 1};
|
||||
node->outputIndex = {9};
|
||||
node->primitive = std::make_unique<schema::PrimitiveT>();
|
||||
node->primitive->value.type = schema::PrimitiveType_MatMul;
|
||||
auto primitive = new schema::MatMulT;
|
||||
primitive->transposeA = true;
|
||||
primitive->transposeB = false;
|
||||
node->primitive->value.value = primitive;
|
||||
node->name = "MatMul2";
|
||||
meta_graph->nodes.emplace_back(std::move(node));
|
||||
}
|
||||
{
|
||||
auto node = std::make_unique<schema::CNodeT>();
|
||||
node->inputIndex = {2, 10, 11, 9, 12};
|
||||
node->outputIndex = {};
|
||||
node->primitive = std::make_unique<schema::PrimitiveT>();
|
||||
node->primitive->value.type = schema::PrimitiveType_ApplyMomentum;
|
||||
auto primitive = new schema::ApplyMomentumT;
|
||||
node->primitive->value.value = primitive;
|
||||
node->name = "Momentum";
|
||||
meta_graph->nodes.emplace_back(std::move(node));
|
||||
}
|
||||
{
|
||||
auto node = std::make_unique<schema::CNodeT>();
|
||||
node->inputIndex = {4, 13, 11, 8, 12};
|
||||
node->outputIndex = {};
|
||||
node->primitive = std::make_unique<schema::PrimitiveT>();
|
||||
node->primitive->value.type = schema::PrimitiveType_ApplyMomentum;
|
||||
auto primitive = new schema::ApplyMomentumT;
|
||||
node->primitive->value.value = primitive;
|
||||
node->name = "Momentum";
|
||||
meta_graph->nodes.emplace_back(std::move(node));
|
||||
}
|
||||
meta_graph->inputIndex = {6, 0}; // XXX TODO why is it reverse?
|
||||
meta_graph->outputIndex = {5, 14};
|
||||
const int NUM_OF_OUTPUTS = 2;
|
||||
|
||||
auto input0 = std::make_unique<schema::TensorT>();
|
||||
input0->nodeType = schema::NodeType::NodeType_ValueNode;
|
||||
input0->format = schema::Format_NHWC;
|
||||
input0->dataType = TypeId::kNumberTypeFloat32;
|
||||
input0->dims = {BATCH_SIZE, FEATURE_SIZE};
|
||||
input0->offset = -1;
|
||||
meta_graph->allTensors.emplace_back(std::move(input0));
|
||||
// tensor 1 - relu
|
||||
auto relu_out = std::make_unique<schema::TensorT>();
|
||||
relu_out->nodeType = schema::NodeType::NodeType_Parameter;
|
||||
relu_out->format = schema::Format_NHWC;
|
||||
relu_out->dataType = TypeId::kNumberTypeFloat32;
|
||||
relu_out->dims = {BATCH_SIZE, FEATURE_SIZE};
|
||||
relu_out->offset = -1;
|
||||
meta_graph->allTensors.emplace_back(std::move(relu_out));
|
||||
// tensor 2 - matmul weights
|
||||
auto weight = std::make_unique<schema::TensorT>();
|
||||
weight->nodeType = schema::NodeType::NodeType_ValueNode;
|
||||
weight->format = schema::Format_KHWC;
|
||||
weight->dataType = TypeId::kNumberTypeFloat32;
|
||||
weight->dims = {NUM_CLASSES, FEATURE_SIZE};
|
||||
size_t weight_size;
|
||||
char *buf;
|
||||
std::string weight_path = "./test_data/train/train_weight_10_1000.bin";
|
||||
ReadFile(weight_path.c_str(), &weight_size, &buf);
|
||||
ASSERT_NE(nullptr, buf);
|
||||
weight->data.resize(weight_size);
|
||||
std::copy(buf, buf + weight_size, weight->data.data());
|
||||
meta_graph->allTensors.emplace_back(std::move(weight));
|
||||
// tensor 3 - matmul
|
||||
auto input3 = std::make_unique<schema::TensorT>();
|
||||
input3->nodeType = schema::NodeType::NodeType_Parameter;
|
||||
input3->format = schema::Format_NHWC;
|
||||
input3->dataType = TypeId::kNumberTypeFloat32;
|
||||
input3->dims = {BATCH_SIZE, NUM_CLASSES};
|
||||
input3->offset = -1;
|
||||
meta_graph->allTensors.emplace_back(std::move(input3));
|
||||
// tensor 4 - fc bias
|
||||
auto bias = std::make_unique<schema::TensorT>();
|
||||
bias->nodeType = schema::NodeType::NodeType_ValueNode;
|
||||
bias->format = schema::Format_NHWC;
|
||||
bias->dataType = TypeId::kNumberTypeFloat32;
|
||||
bias->dims = {NUM_CLASSES};
|
||||
bias->offset = -1;
|
||||
std::string bias_path = "./test_data/train/train_bias_10.bin";
|
||||
size_t bias_size;
|
||||
ReadFile(bias_path.c_str(), &bias_size, &buf);
|
||||
ASSERT_NE(nullptr, buf);
|
||||
bias->data.resize(bias_size);
|
||||
std::copy(buf, buf + bias_size, bias->data.data());
|
||||
meta_graph->allTensors.emplace_back(std::move(bias));
|
||||
|
||||
// tensor 5 - bias_add
|
||||
auto input5 = std::make_unique<schema::TensorT>();
|
||||
input5->nodeType = schema::NodeType::NodeType_Parameter;
|
||||
input5->format = schema::Format_NHWC;
|
||||
input5->dataType = TypeId::kNumberTypeFloat32;
|
||||
input5->dims = {BATCH_SIZE, NUM_CLASSES};
|
||||
input5->offset = -1;
|
||||
meta_graph->allTensors.emplace_back(std::move(input5));
|
||||
// tensor 6 - Label
|
||||
{
|
||||
auto label = std::make_unique<schema::TensorT>();
|
||||
label->nodeType = schema::NodeType::NodeType_ValueNode;
|
||||
label->format = schema::Format_NHWC;
|
||||
label->dataType = TypeId::kNumberTypeInt32;
|
||||
label->dims = {BATCH_SIZE};
|
||||
label->offset = -1;
|
||||
label->data.resize(BATCH_SIZE * NUM_CLASSES * sizeof(float));
|
||||
int *data = reinterpret_cast<int *>(label->data.data());
|
||||
for (int i = 0; i < BATCH_SIZE; i++)
|
||||
for (int j = 0; j < NUM_CLASSES; j++) *(data + i * NUM_CLASSES + j) = j;
|
||||
meta_graph->allTensors.emplace_back(std::move(label));
|
||||
}
|
||||
// tensor 7 - Softmaxentropy
|
||||
auto input7 = std::make_unique<schema::TensorT>();
|
||||
input7->nodeType = schema::NodeType::NodeType_Parameter;
|
||||
input7->format = schema::Format_NHWC;
|
||||
input7->dataType = TypeId::kNumberTypeFloat32;
|
||||
input7->dims = {BATCH_SIZE, NUM_CLASSES};
|
||||
input7->offset = -1;
|
||||
meta_graph->allTensors.emplace_back(std::move(input7));
|
||||
// tensor 8 - biasGrad
|
||||
auto input8 = std::make_unique<schema::TensorT>();
|
||||
input8->nodeType = schema::NodeType::NodeType_Parameter;
|
||||
input8->format = schema::Format_NHWC;
|
||||
input8->dataType = TypeId::kNumberTypeFloat32;
|
||||
input8->dims = {NUM_CLASSES};
|
||||
input8->offset = -1;
|
||||
meta_graph->allTensors.emplace_back(std::move(input8));
|
||||
// tensor 9 - matmul2
|
||||
auto input9 = std::make_unique<schema::TensorT>();
|
||||
input9->nodeType = schema::NodeType::NodeType_Parameter;
|
||||
input9->format = schema::Format_NHWC;
|
||||
input9->dataType = TypeId::kNumberTypeFloat32;
|
||||
input9->dims = {NUM_CLASSES, FEATURE_SIZE};
|
||||
input9->offset = -1;
|
||||
meta_graph->allTensors.emplace_back(std::move(input9));
|
||||
// tensor 10 weights accumulate
|
||||
auto input10 = std::make_unique<schema::TensorT>();
|
||||
input10->nodeType = schema::NodeType::NodeType_ValueNode;
|
||||
input10->format = schema::Format_NHWC;
|
||||
input10->dataType = TypeId::kNumberTypeFloat32;
|
||||
input10->dims = {NUM_CLASSES, FEATURE_SIZE};
|
||||
input10->offset = -1;
|
||||
size_t input10_size = NUM_CLASSES * FEATURE_SIZE * sizeof(float);
|
||||
input10->data.resize(input10_size);
|
||||
std::fill(input10->data.data(), input10->data.data() + input10_size, 0.f);
|
||||
meta_graph->allTensors.emplace_back(std::move(input10));
|
||||
// tensor 11 - lr
|
||||
{
|
||||
auto lr = std::make_unique<schema::TensorT>();
|
||||
lr->nodeType = schema::NodeType::NodeType_ValueNode;
|
||||
lr->format = schema::Format_NHWC;
|
||||
lr->dataType = TypeId::kNumberTypeFloat32;
|
||||
lr->dims = {1};
|
||||
lr->offset = -1;
|
||||
lr->data.resize(sizeof(float));
|
||||
float *data = reinterpret_cast<float *>(lr->data.data());
|
||||
*data = 0.01f;
|
||||
meta_graph->allTensors.emplace_back(std::move(lr));
|
||||
}
|
||||
// tensor 12 - momentum
|
||||
{
|
||||
auto input12 = std::make_unique<schema::TensorT>();
|
||||
input12->nodeType = schema::NodeType::NodeType_ValueNode;
|
||||
input12->format = schema::Format_NHWC;
|
||||
input12->dataType = TypeId::kNumberTypeFloat32;
|
||||
input12->dims = {1};
|
||||
input12->offset = -1;
|
||||
input12->data.resize(sizeof(float));
|
||||
float *data = reinterpret_cast<float *>(input12->data.data());
|
||||
*data = 0.f;
|
||||
meta_graph->allTensors.emplace_back(std::move(input12));
|
||||
}
|
||||
// tensor 13 - bias accumulate
|
||||
auto input13 = std::make_unique<schema::TensorT>();
|
||||
input13->nodeType = schema::NodeType::NodeType_ValueNode;
|
||||
input13->format = schema::Format_NHWC;
|
||||
input13->dataType = TypeId::kNumberTypeFloat32;
|
||||
input13->dims = {NUM_CLASSES};
|
||||
input13->offset = -1;
|
||||
size_t input13_size = NUM_CLASSES * sizeof(float);
|
||||
input13->data.resize(input13_size);
|
||||
std::fill(input13->data.data(), input13->data.data() + input13_size, 0.f);
|
||||
meta_graph->allTensors.emplace_back(std::move(input13));
|
||||
|
||||
// tensor 14 - loss
|
||||
{
|
||||
auto loss14 = std::make_unique<schema::TensorT>();
|
||||
loss14->nodeType = schema::NodeType::NodeType_ValueNode;
|
||||
loss14->format = schema::Format_NHWC;
|
||||
loss14->dataType = TypeId::kNumberTypeFloat32;
|
||||
loss14->dims = {1};
|
||||
loss14->offset = -1;
|
||||
loss14->data.resize(sizeof(float));
|
||||
float *data = reinterpret_cast<float *>(loss14->data.data());
|
||||
*data = 0.0f;
|
||||
meta_graph->allTensors.emplace_back(std::move(loss14));
|
||||
}
|
||||
|
||||
//================================================================
|
||||
buf = nullptr;
|
||||
|
||||
flatbuffers::FlatBufferBuilder builder(1024);
|
||||
auto offset = schema::MetaGraph::Pack(builder, meta_graph.get());
|
||||
builder.Finish(offset);
|
||||
size_t size = builder.GetSize();
|
||||
const char *content = reinterpret_cast<char *>(builder.GetBufferPointer());
|
||||
std::cout << "build fb size= " << size << "\n";
|
||||
|
||||
#if 0 // EXPORT_FILE
|
||||
std::string path = std::string("hcdemo_train.fb");
|
||||
std::ofstream ofs(path);
|
||||
ASSERT_EQ(true, ofs.good());
|
||||
ASSERT_EQ(true, ofs.is_open());
|
||||
|
||||
ofs.seekp(0, std::ios::beg);
|
||||
ofs.write(content, size);
|
||||
ofs.close();
|
||||
#endif
|
||||
|
||||
auto model = lite::Model::Import(content, size);
|
||||
ASSERT_NE(nullptr, model);
|
||||
meta_graph.reset();
|
||||
content = nullptr;
|
||||
auto context = new lite::Context;
|
||||
context->device_ctx_.type = lite::DT_CPU;
|
||||
context->cpu_bind_mode_ = lite::NO_BIND;
|
||||
context->thread_num_ = 1;
|
||||
auto session = new session::TrainSession();
|
||||
ASSERT_NE(nullptr, session);
|
||||
session->Init(context);
|
||||
auto ret = session->CompileGraph(model);
|
||||
ASSERT_EQ(lite::RET_OK, ret);
|
||||
session->train();
|
||||
|
||||
auto inputs = session->GetInputs();
|
||||
ASSERT_EQ(inputs.size(), 2);
|
||||
auto inTensor = inputs.at(0);
|
||||
ASSERT_NE(nullptr, inTensor);
|
||||
auto data = inTensor->MutableData();
|
||||
//===================================================
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/train/train_input_32_1000.bin";
|
||||
ReadFile(input_path.c_str(), &input_size, &buf);
|
||||
ASSERT_NE(nullptr, buf);
|
||||
auto input_data = reinterpret_cast<float *>(buf);
|
||||
ASSERT_NE(nullptr, input_data);
|
||||
//===================================================
|
||||
ASSERT_EQ(input_size, inTensor->Size());
|
||||
memcpy(data, input_data, input_size);
|
||||
|
||||
auto labelTensor = inputs.at(1);
|
||||
ASSERT_NE(nullptr, labelTensor);
|
||||
ASSERT_EQ(BATCH_SIZE, labelTensor->ElementsNum());
|
||||
auto labels = reinterpret_cast<int *>(labelTensor->MutableData());
|
||||
for (int i = 0; i < BATCH_SIZE; i++) labels[i] = (i * 97) % NUM_CLASSES;
|
||||
|
||||
ret = session->RunGraph();
|
||||
ASSERT_EQ(lite::RET_OK, ret);
|
||||
auto outputs = session->GetOutputsByName("BiasAdd");
|
||||
ASSERT_EQ(outputs.size(), 1);
|
||||
auto outTensor = (outputs.at(0));
|
||||
ASSERT_NE(nullptr, outTensor);
|
||||
ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type());
|
||||
auto *outData = reinterpret_cast<float *>(outTensor->MutableData());
|
||||
ASSERT_NE(nullptr, outData);
|
||||
std::cout << "========================dW=====================" << std::endl;
|
||||
for (int i = 0; i < 20; i++) {
|
||||
std::cout << outData[i] << ", ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
ret = session->RunGraph();
|
||||
outputs = session->GetOutputsByName("BiasAdd");
|
||||
ASSERT_EQ(outputs.size(), 1);
|
||||
outTensor = (outputs.at(0));
|
||||
ASSERT_NE(nullptr, outTensor);
|
||||
// ASSERT_EQ(28 * 28 * 32, outTensor->ElementsNum());
|
||||
ASSERT_EQ(TypeId::kNumberTypeFloat32, outTensor->data_type());
|
||||
outData = reinterpret_cast<float *>(outTensor->MutableData());
|
||||
ASSERT_NE(nullptr, outData);
|
||||
std::cout << "========================dW=====================" << std::endl;
|
||||
for (int i = 0; i < 20; i++) {
|
||||
std::cout << outData[i] << ", ";
|
||||
}
|
||||
//===================================================
|
||||
#if 0
|
||||
size_t output_size;
|
||||
std::string output_path = "./convfp32_out_1_28_28_32.bin";
|
||||
buf = mindspore::lite::ReadFile(output_path.c_str(), &output_size);
|
||||
ASSERT_NE(nullptr, buf);
|
||||
auto output_data = reinterpret_cast<float *>(buf);
|
||||
ASSERT_NE(nullptr, output_data);
|
||||
//===================================================
|
||||
ASSERT_EQ(output_size, runOutput->Size());
|
||||
for (size_t i = 0; i < runOutput->ElementsNum(); i++) {
|
||||
ASSERT_EQ(output_data[i], outData[i]);
|
||||
}
|
||||
#endif
|
||||
MS_LOG(INFO) << "Passed";
|
||||
}
|
||||
|
||||
int32_t fileIterator(mindspore::session::TrainSession *session, const std::string &path,
|
||||
std::function<int32_t(mindspore::session::TrainSession *session,
|
||||
const std::string &)> cb) {
|
||||
int32_t res = 0;
|
||||
if (auto dir = opendir(path.c_str())) {
|
||||
while (auto f = readdir(dir)) {
|
||||
if (!f->d_name || f->d_name[0] == '.') continue;
|
||||
if (f->d_type == DT_DIR) fileIterator(session, path + f->d_name + "/", cb);
|
||||
|
||||
if (f->d_type == DT_REG)
|
||||
res |= cb(session, path + f->d_name);
|
||||
}
|
||||
closedir(dir);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
#if 0
|
||||
void replaceExt(const std::string &src, std::string *dst) {
|
||||
dst = &std::move(src.substr(0, src.find_last_of('.')) + ".emb");
|
||||
}
|
||||
#endif
|
||||
int32_t runEffNet(mindspore::session::TrainSession *session, const std::string &in, const std::string &out) {
|
||||
// setup input
|
||||
auto inputs = session->GetInputs();
|
||||
// ASSERT_EQ(inputs.size(), 1);
|
||||
auto inTensor = inputs.at(0);
|
||||
// ASSERT_NE(nullptr, inTensor);
|
||||
float *data = reinterpret_cast<float *>(inTensor->MutableData());
|
||||
|
||||
size_t input_size;
|
||||
float *in_buf = reinterpret_cast<float *>(lite::ReadFile(in.c_str(), &input_size));
|
||||
// ASSERT_NE(nullptr, data);
|
||||
auto input_data = reinterpret_cast<float *>(in_buf);
|
||||
// ASSERT_EQ(input_size, inTensor->Size());
|
||||
std::copy(input_data, input_data + inTensor->ElementsNum(), data);
|
||||
|
||||
// execute network
|
||||
session->RunGraph();
|
||||
|
||||
// compare outputs
|
||||
auto outputs = session->GetOutputs();
|
||||
auto output = ((outputs.begin())->second);
|
||||
float *output_data = reinterpret_cast<float *>(output.at(0)->MutableData());
|
||||
|
||||
return mindspore::lite::CompareRelativeOutput(output_data, out.c_str());
|
||||
}
|
||||
|
||||
TEST_F(NetworkTest, efficient_net) {
|
||||
const int NUM_OF_INPUTS = 1;
|
||||
char *buf = nullptr;
|
||||
size_t net_size = 0;
|
||||
std::string net = "./test_data/nets/efficientnet_b0_f.ms";
|
||||
ReadFile(net.c_str(), &net_size, &buf);
|
||||
auto model = lite::Model::Import(buf, net_size);
|
||||
auto context = new lite::Context;
|
||||
context->device_ctx_.type = lite::DT_CPU;
|
||||
context->cpu_bind_mode_ = lite::NO_BIND;
|
||||
context->thread_num_ = 1;
|
||||
|
||||
|
||||
auto session = new mindspore::session::TrainSession();
|
||||
ASSERT_NE(session, nullptr);
|
||||
auto ret = session->Init(context);
|
||||
ASSERT_EQ(lite::RET_OK, ret);
|
||||
ret = session->CompileGraph(model);
|
||||
ASSERT_EQ(lite::RET_OK, ret);
|
||||
session->eval();
|
||||
|
||||
#if 0
|
||||
std::string path = "/opt/share/MiniBinEmbDataset/";
|
||||
auto res = fileIterator(session, path, [](mindspore::session::TrainSession *session, const std::string &in) {
|
||||
int32_t res = 0;
|
||||
if (in.find(".bin") != std::string::npos) {
|
||||
std::string out;
|
||||
replaceExt(in, out);
|
||||
res = runEffNet(session, in, out);
|
||||
std::cout << "input file: " << in << (res ? " Fail" : " Pass") << std::endl;
|
||||
}
|
||||
return res;
|
||||
});
|
||||
#else
|
||||
std::string in = "./test_data/nets/effNet_input_x_1_3_224_224.bin";
|
||||
std::string out = "./test_data/nets/effNet_output_y_1_1000.bin";
|
||||
auto res = runEffNet(session, in, out);
|
||||
#endif
|
||||
// auto inputs = session->GetInputs();
|
||||
// ASSERT_EQ(inputs.size(), NUM_OF_INPUTS);
|
||||
// auto inTensor = inputs.at(0);
|
||||
// ASSERT_NE(nullptr, inTensor);
|
||||
// float *data = reinterpret_cast<float *>(inTensor->MutableData());
|
||||
|
||||
// // fill input
|
||||
// std::string input_path = "./test_data/nets/effNet_input_x_1_3_224_224.bin";
|
||||
// // std::string input_path = "/opt/share/MiniBinEmbDataset/2_pet/n02099601_3111.bin";
|
||||
// size_t input_size;
|
||||
// char *in_buf = nullptr;
|
||||
// ReadFile(input_path.c_str(), &input_size, &in_buf);
|
||||
// ASSERT_NE(nullptr, data);
|
||||
// auto input_data = reinterpret_cast<float *>(in_buf);
|
||||
// ASSERT_EQ(input_size, inTensor->Size());
|
||||
// std::copy(input_data, input_data+inTensor->ElementsNum(), data);
|
||||
|
||||
// // execute network
|
||||
// ret = session->RunGraph();
|
||||
|
||||
// // compare outputs
|
||||
// std::string output_path = "./test_data/nets/effNet_output_y_1_1000.bin";
|
||||
// // std::string output_path = "/opt/share/MiniBinEmbDataset/2_pet/n02099601_3111.emb";
|
||||
// auto outputs = session->GetOutputs();
|
||||
// auto output = ((outputs.begin())->second);
|
||||
// float* output_data = reinterpret_cast<float *>(output.at(0)->MutableData());
|
||||
// int res = lite::CompareRelativeOutput(output_data, output_path);
|
||||
ASSERT_EQ(res, 0);
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
|
@ -22,6 +22,7 @@
|
|||
#include "mindspore/lite/src/kernel_registry.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "src/common/file_utils.h"
|
||||
#include "src/common/file_utils_ext.h"
|
||||
#include "src/runtime/kernel/arm/fp32_grad/pooling_grad.h"
|
||||
#include "nnacl/fp32_grad/pooling_grad.h"
|
||||
|
||||
|
@ -60,6 +61,7 @@ TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) {
|
|||
auto pooling_param = new PoolingParameter();
|
||||
InitPoolingParamFP32(pooling_param);
|
||||
pooling_param->output_channel_ = 3;
|
||||
pooling_param->pool_mode_ = PoolMode_AvgPool;
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
|
@ -95,7 +97,7 @@ TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) {
|
|||
std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_1_28_28_3.bin";
|
||||
lite::CompareOutput(output_data, output_path);
|
||||
|
||||
delete input_data;
|
||||
delete[] input_data;
|
||||
delete[] output_data;
|
||||
delete pooling_param;
|
||||
MS_LOG(INFO) << "TestAvgPoolingGradFp32 passed";
|
||||
|
@ -122,10 +124,10 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) {
|
|||
dy_tensor.SetData(input_data);
|
||||
|
||||
std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_1_28_28_3.bin";
|
||||
input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1_path.c_str(), &input_size));
|
||||
auto input1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1_path.c_str(), &input_size));
|
||||
std::vector<int> dim_x({1, 28, 28, 3});
|
||||
lite::tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
x_tensor.SetData(input_data);
|
||||
x_tensor.SetData(input1_data);
|
||||
|
||||
std::vector<lite::tensor::Tensor *> inputs = {&dy_tensor, &x_tensor};
|
||||
|
||||
|
@ -150,12 +152,205 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) {
|
|||
std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_1_28_28_3.bin";
|
||||
lite::CompareOutput(output_data, output_path);
|
||||
|
||||
// delete input_data;
|
||||
// delete[] output_data;
|
||||
delete pooling_param;
|
||||
delete[] input_data;
|
||||
delete[] input1_data;
|
||||
delete[] output_data;
|
||||
dx_tensor.SetData(nullptr);
|
||||
x_tensor.SetData(nullptr);
|
||||
dy_tensor.SetData(nullptr);
|
||||
// delete pooling_param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestAvgPoolingGradFp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) {
|
||||
// prepare stage
|
||||
auto pooling_param = new PoolingParameter();
|
||||
InitPoolingParamFP32(pooling_param);
|
||||
|
||||
pooling_param->output_channel_ = 3;
|
||||
pooling_param->input_batch_ = 3;
|
||||
pooling_param->output_batch_ = 3;
|
||||
|
||||
// runtime part
|
||||
printf("Calculating runtime cost...\n");
|
||||
// uint64_t time_avg = 0;
|
||||
size_t output_data_size =
|
||||
pooling_param->output_batch_ * pooling_param->output_channel_ * pooling_param->input_h_ * pooling_param->input_w_;
|
||||
|
||||
size_t input_size;
|
||||
std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_3_28_28_3.bin";
|
||||
auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
|
||||
std::vector<int> dim_dy({1, 28, 28, 3});
|
||||
lite::tensor::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy);
|
||||
dy_tensor.SetData(input_data);
|
||||
|
||||
std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_3_28_28_3.bin";
|
||||
auto input1_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1_path.c_str(), &input_size));
|
||||
std::vector<int> dim_x({1, 28, 28, 3});
|
||||
lite::tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
x_tensor.SetData(input1_data);
|
||||
|
||||
std::vector<lite::tensor::Tensor *> inputs = {&dy_tensor, &x_tensor};
|
||||
|
||||
auto output_data = new float[output_data_size];
|
||||
std::vector<int> dim_dx({1, 28, 28, 3});
|
||||
lite::tensor::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx);
|
||||
dx_tensor.SetData(output_data);
|
||||
std::vector<lite::tensor::Tensor *> outputs = {&dx_tensor};
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
|
||||
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(pooling_param), NULL, desc, nullptr);
|
||||
|
||||
kernel_obj->Run();
|
||||
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 20; i++) {
|
||||
std::cout << output_data[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::string output_path = "./test_data/pooling/avgpoolgradfp32_1_dx_3_28_28_3.bin";
|
||||
lite::CompareOutput(output_data, output_path);
|
||||
|
||||
delete[] input_data;
|
||||
delete[] input1_data;
|
||||
delete[] output_data;
|
||||
dx_tensor.SetData(nullptr);
|
||||
x_tensor.SetData(nullptr);
|
||||
dy_tensor.SetData(nullptr);
|
||||
// delete pooling_param;
|
||||
delete kernel_obj;
|
||||
MS_LOG(INFO) << "TestAvgPoolingGradBatchFp32 passed";
|
||||
}
|
||||
|
||||
TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) {
|
||||
// prepare stage
|
||||
// input size will be equal to the original size of x, output size will be the output size as in forward
|
||||
auto pool = new PoolingParameter();
|
||||
InitPoolingParamFP32(pool);
|
||||
pool->output_channel_ = 3;
|
||||
pool->pool_mode_ = PoolMode_AvgPool;
|
||||
pool->input_batch_ = 3;
|
||||
pool->output_batch_ = 3;
|
||||
pool->output_h_ = 14;
|
||||
pool->output_w_ = 14;
|
||||
pool->stride_h_ = 2;
|
||||
pool->stride_w_ = 2;
|
||||
|
||||
size_t input_size;
|
||||
size_t y_data_size = pool->output_batch_ * pool->output_channel_ * pool->input_h_ * pool->input_w_;
|
||||
|
||||
auto x_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s2_x_3_28_28_3.bin", &input_size));
|
||||
std::vector<int> dim_x({pool->output_batch_, pool->input_h_, pool->input_w_, pool->input_channel_});
|
||||
lite::tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
x_tensor.SetData(x_data);
|
||||
|
||||
auto yt_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s2_dy_3_28_28_3.bin", &input_size));
|
||||
std::vector<int> dim_y({pool->output_batch_, pool->output_h_, pool->output_w_, pool->output_channel_});
|
||||
lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
auto out_data = new float[y_data_size];
|
||||
lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
out_tensor.SetData(out_data);
|
||||
|
||||
std::vector<lite::tensor::Tensor *> inputs = {&yt_tensor, &x_tensor};
|
||||
std::vector<lite::tensor::Tensor *> outputs = {&out_tensor};
|
||||
// ----------------------------------------
|
||||
kernel::KernelKey pool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
|
||||
auto pool_creator = lite::KernelRegistry::GetInstance()->GetCreator(pool_desc);
|
||||
auto kernel = pool_creator(inputs, outputs, reinterpret_cast<OpParameter *>(pool), NULL, pool_desc, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
kernel->Run();
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
printf("single thread running time : %ld ms\n", time_end - time_start);
|
||||
|
||||
std::string output_path = "./test_data/pooling/avgpoolgradfp32_s2_dx_3_28_28_3.bin";
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] x_data;
|
||||
delete[] yt_data;
|
||||
// delete[] out_data;
|
||||
// delete conv_param;
|
||||
x_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
MS_LOG(INFO) << "AvgPoolGradStride2Fp32 Filter Grad passed";
|
||||
}
|
||||
|
||||
TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) {
|
||||
// prepare stage
|
||||
// input size will be equal to the original size of x, output size will be the output size as in forward
|
||||
auto pool = new PoolingParameter();
|
||||
InitPoolingParamFP32(pool);
|
||||
pool->output_channel_ = 3;
|
||||
pool->pool_mode_ = PoolMode_AvgPool;
|
||||
pool->input_batch_ = 3;
|
||||
pool->output_batch_ = 3;
|
||||
pool->output_h_ = 10;
|
||||
pool->output_w_ = 10;
|
||||
pool->stride_h_ = 3;
|
||||
pool->stride_w_ = 3;
|
||||
|
||||
size_t input_size;
|
||||
size_t y_data_size = pool->output_batch_ * pool->output_channel_ * pool->input_h_ * pool->input_w_;
|
||||
|
||||
auto x_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s3_x_3_28_28_3.bin", &input_size));
|
||||
std::vector<int> dim_x({pool->output_batch_, pool->input_h_, pool->input_w_, pool->input_channel_});
|
||||
lite::tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
x_tensor.SetData(x_data);
|
||||
|
||||
auto yt_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s3_dy_3_28_28_3.bin", &input_size));
|
||||
std::vector<int> dim_y({pool->output_batch_, pool->output_h_, pool->output_w_, pool->output_channel_});
|
||||
lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
auto out_data = new float[y_data_size];
|
||||
lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
out_tensor.SetData(out_data);
|
||||
|
||||
std::vector<lite::tensor::Tensor *> inputs = {&yt_tensor, &x_tensor};
|
||||
std::vector<lite::tensor::Tensor *> outputs = {&out_tensor};
|
||||
// ----------------------------------------
|
||||
kernel::KernelKey pool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
|
||||
auto pool_creator = lite::KernelRegistry::GetInstance()->GetCreator(pool_desc);
|
||||
auto kernel = pool_creator(inputs, outputs, reinterpret_cast<OpParameter *>(pool), NULL, pool_desc, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
kernel->Run();
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
printf("single thread running time : %ld ms\n", time_end - time_start);
|
||||
|
||||
std::string output_path = "./test_data/pooling/avgpoolgradfp32_s3_dx_3_28_28_3.bin";
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] x_data;
|
||||
delete[] yt_data;
|
||||
// delete[] out_data;
|
||||
// delete conv_param;
|
||||
x_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
MS_LOG(INFO) << "AvgPoolGradStride3Fp32 Filter Grad passed";
|
||||
}
|
||||
|
||||
TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) {
|
||||
// prepare stage
|
||||
auto pooling_param = new PoolingParameter();
|
||||
|
@ -169,26 +364,25 @@ TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) {
|
|||
pooling_param->output_batch_ * pooling_param->output_channel_ * pooling_param->output_h_ * pooling_param->output_w_;
|
||||
|
||||
size_t input_size;
|
||||
std::string i_path = "./test_data/pooling/maxpoolgradfp32_1_i_1_28_28_3.bin";
|
||||
auto ill_data = reinterpret_cast<int64_t *>(mindspore::lite::ReadFile(i_path.c_str(), &input_size));
|
||||
auto i_data = new int[output_data_size];
|
||||
for (uint32_t i = 0; i < output_data_size; i++) {
|
||||
i_data[i] = static_cast<int>(ill_data[i]);
|
||||
}
|
||||
std::string i_path = "./test_data/pooling/maxpoolgradfp32_1_x_1_28_28_3.bin";
|
||||
auto in_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(i_path.c_str(), &input_size));
|
||||
|
||||
std::string dy_path = "./test_data/pooling/maxpoolgradfp32_1_dy_1_28_28_3.bin";
|
||||
auto dy_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dy_path.c_str(), &input_size));
|
||||
|
||||
std::string dx_path = "./test_data/pooling/maxpoolgradfp32_1_dx_1_28_28_3.bin";
|
||||
auto dx_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(dx_path.c_str(), &input_size));
|
||||
|
||||
auto output_data = new float[output_data_size];
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
MaxPoolingGrad(dy_data, i_data, output_data, pooling_param);
|
||||
MaxPoolingGrad(in_data, dx_data, dy_data, output_data, pooling_param);
|
||||
}
|
||||
|
||||
int loop_count = 100;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
MaxPoolingGrad(dy_data, i_data, output_data, pooling_param);
|
||||
MaxPoolingGrad(in_data, dx_data, dy_data, output_data, pooling_param);
|
||||
}
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
auto cost = time_end - time_start;
|
||||
|
@ -200,11 +394,13 @@ TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) {
|
|||
std::cout << output_data[i] << " ,";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::string output_path = "./test_data/pooling/maxpoolgradfp32_1_dx_1_28_28_3.bin";
|
||||
std::string output_path = "./test_data/pooling/maxpoolgradfp32_1_xgrad_1_28_28_3.bin";
|
||||
lite::CompareOutput(output_data, output_path);
|
||||
|
||||
// delete input_data;
|
||||
delete[] in_data;
|
||||
delete pooling_param;
|
||||
delete[] dy_data;
|
||||
delete[] dx_data;
|
||||
delete[] output_data;
|
||||
MS_LOG(INFO) << "TestMaxPoolingGradFp32 passed";
|
||||
}
|
||||
|
@ -326,4 +522,216 @@ TEST_F(TestPoolingGradFp32, MaxPoolingKernelGradFp32) {
|
|||
MS_LOG(INFO) << "TestMaxPoolingKernelGradFp32 passed";
|
||||
}
|
||||
#endif // if 0 before MaxPoolingKernelGradFp32
|
||||
|
||||
TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) {
|
||||
// prepare stage
|
||||
// input size will be equal to the original size of x, output size will be the output size as in forward
|
||||
auto maxpool = new PoolingParameter();
|
||||
InitPoolingParamFP32(maxpool);
|
||||
maxpool->output_channel_ = 3;
|
||||
maxpool->pool_mode_ = PoolMode_MaxPool;
|
||||
maxpool->input_batch_ = 3;
|
||||
maxpool->output_batch_ = 3;
|
||||
|
||||
size_t input_size;
|
||||
size_t y_data_size = maxpool->output_batch_ * maxpool->output_channel_ * maxpool->input_h_ * maxpool->input_w_;
|
||||
|
||||
auto x_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_x_3_28_28_3.bin", &input_size));
|
||||
std::vector<int> dim_x({3, 28, 28, 3});
|
||||
lite::tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
x_tensor.SetData(x_data);
|
||||
|
||||
auto y_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_dx_3_28_28_3.bin", &input_size));
|
||||
std::vector<int> dim_y({3, 28, 28, 3});
|
||||
lite::tensor::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
y_tensor.SetData(y_data);
|
||||
|
||||
auto yt_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_dy_3_28_28_3.bin", &input_size));
|
||||
lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
auto out_data = new float[y_data_size];
|
||||
lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
out_tensor.SetData(out_data);
|
||||
|
||||
std::vector<lite::tensor::Tensor *> maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor};
|
||||
std::vector<lite::tensor::Tensor *> maxpool_outputs = {&out_tensor};
|
||||
// ----------------------------------------
|
||||
kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
|
||||
auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc);
|
||||
auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), NULL,
|
||||
maxpool_desc, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
kernel->Run();
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
printf("single thread running time : %ld ms\n", time_end - time_start);
|
||||
|
||||
std::string output_path = "./test_data/pooling/maxpoolgradfp32_1_xgrad_3_28_28_3.bin";
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] x_data;
|
||||
delete[] y_data;
|
||||
delete[] yt_data;
|
||||
// delete[] out_data;
|
||||
// delete conv_param;
|
||||
x_tensor.SetData(nullptr);
|
||||
y_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
MS_LOG(INFO) << "MaxPoolGradBatchFp32 Filter Grad passed";
|
||||
}
|
||||
|
||||
TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) {
|
||||
// prepare stage
|
||||
// input size will be equal to the original size of x, output size will be the output size as in forward
|
||||
auto maxpool = new PoolingParameter();
|
||||
InitPoolingParamFP32(maxpool);
|
||||
maxpool->output_channel_ = 3;
|
||||
maxpool->input_channel_ = 3;
|
||||
maxpool->pool_mode_ = PoolMode_MaxPool;
|
||||
maxpool->input_batch_ = 3;
|
||||
maxpool->output_batch_ = 3;
|
||||
maxpool->output_h_ = 14;
|
||||
maxpool->output_w_ = 14;
|
||||
maxpool->stride_h_ = 2;
|
||||
maxpool->stride_w_ = 2;
|
||||
|
||||
size_t input_size;
|
||||
size_t y_data_size = maxpool->output_batch_ * maxpool->output_channel_ * maxpool->input_h_ * maxpool->input_w_;
|
||||
|
||||
auto x_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_x_3_28_28_3.bin", &input_size));
|
||||
std::vector<int> dim_x({maxpool->output_batch_, maxpool->input_h_, maxpool->input_w_, maxpool->input_channel_});
|
||||
lite::tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
x_tensor.SetData(x_data);
|
||||
|
||||
auto y_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_dx_3_28_28_3.bin", &input_size));
|
||||
std::vector<int> dim_y({maxpool->output_batch_, maxpool->output_h_, maxpool->output_w_, maxpool->output_channel_});
|
||||
lite::tensor::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
y_tensor.SetData(y_data);
|
||||
|
||||
auto yt_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_dy_3_28_28_3.bin", &input_size));
|
||||
lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
auto out_data = new float[y_data_size];
|
||||
lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
out_tensor.SetData(out_data);
|
||||
|
||||
std::vector<lite::tensor::Tensor *> maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor};
|
||||
std::vector<lite::tensor::Tensor *> maxpool_outputs = {&out_tensor};
|
||||
// ----------------------------------------
|
||||
kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
|
||||
auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc);
|
||||
auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), NULL,
|
||||
maxpool_desc, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
kernel->Run();
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
printf("single thread running time : %ld ms\n", time_end - time_start);
|
||||
|
||||
std::string output_path = "./test_data/pooling/maxpoolgradfp32_s2_xgrad_3_28_28_3.bin";
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] x_data;
|
||||
delete[] y_data;
|
||||
delete[] yt_data;
|
||||
// delete[] out_data;
|
||||
// delete conv_param;
|
||||
x_tensor.SetData(nullptr);
|
||||
y_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
MS_LOG(INFO) << "MaxPoolGradStride2Fp32 Filter Grad passed";
|
||||
}
|
||||
|
||||
TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) {
|
||||
// prepare stage
|
||||
// input size will be equal to the original size of x, output size will be the output size as in forward
|
||||
auto maxpool = new PoolingParameter();
|
||||
InitPoolingParamFP32(maxpool);
|
||||
maxpool->output_channel_ = 3;
|
||||
maxpool->input_channel_ = 3;
|
||||
maxpool->pool_mode_ = PoolMode_MaxPool;
|
||||
maxpool->input_batch_ = 3;
|
||||
maxpool->output_batch_ = 3;
|
||||
maxpool->output_h_ = 10;
|
||||
maxpool->output_w_ = 10;
|
||||
maxpool->stride_h_ = 3;
|
||||
maxpool->stride_w_ = 3;
|
||||
|
||||
size_t input_size;
|
||||
size_t y_data_size = maxpool->output_batch_ * maxpool->output_channel_ * maxpool->input_h_ * maxpool->input_w_;
|
||||
|
||||
auto x_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_x_3_28_28_3.bin", &input_size));
|
||||
std::vector<int> dim_x({maxpool->output_batch_, maxpool->input_h_, maxpool->input_w_, maxpool->input_channel_});
|
||||
lite::tensor::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
x_tensor.SetData(x_data);
|
||||
|
||||
auto y_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_dx_3_28_28_3.bin", &input_size));
|
||||
std::vector<int> dim_y({maxpool->output_batch_, maxpool->output_h_, maxpool->output_w_, maxpool->output_channel_});
|
||||
lite::tensor::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
y_tensor.SetData(y_data);
|
||||
|
||||
auto yt_data = reinterpret_cast<float *>(
|
||||
mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_dy_3_28_28_3.bin", &input_size));
|
||||
lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
yt_tensor.SetData(yt_data);
|
||||
|
||||
auto out_data = new float[y_data_size];
|
||||
lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, dim_x);
|
||||
out_tensor.SetData(out_data);
|
||||
|
||||
std::vector<lite::tensor::Tensor *> maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor};
|
||||
std::vector<lite::tensor::Tensor *> maxpool_outputs = {&out_tensor};
|
||||
// ----------------------------------------
|
||||
kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad};
|
||||
auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc);
|
||||
auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), NULL,
|
||||
maxpool_desc, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
kernel->Run();
|
||||
auto time_end = mindspore::lite::GetTimeUs();
|
||||
printf("single thread running time : %ld ms\n", time_end - time_start);
|
||||
|
||||
std::string output_path = "./test_data/pooling/maxpoolgradfp32_s3_xgrad_3_28_28_3.bin";
|
||||
auto res = lite::CompareRelativeOutput(out_data, output_path);
|
||||
|
||||
EXPECT_EQ(res, 0);
|
||||
|
||||
delete[] x_data;
|
||||
delete[] y_data;
|
||||
delete[] yt_data;
|
||||
// delete[] out_data;
|
||||
// delete conv_param;
|
||||
x_tensor.SetData(nullptr);
|
||||
y_tensor.SetData(nullptr);
|
||||
yt_tensor.SetData(nullptr);
|
||||
out_tensor.SetData(nullptr);
|
||||
delete kernel;
|
||||
MS_LOG(INFO) << "MaxPoolGradStride3Fp32 Filter Grad passed";
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -40,7 +40,7 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) {
|
|||
y_tensor.SetData(input_data);
|
||||
|
||||
std::string label_path = "./test_data/operators/sce_fp32_1_l_6.bin";
|
||||
auto ll_labels = reinterpret_cast<int64 *>(mindspore::lite::ReadFile(label_path.c_str(), &input_size));
|
||||
auto ll_labels = reinterpret_cast<int64_t *>(mindspore::lite::ReadFile(label_path.c_str(), &input_size));
|
||||
auto labels = new int[6];
|
||||
for (int i = 0; i < 6; i++) labels[i] = static_cast<int>(ll_labels[i]);
|
||||
|
||||
|
@ -57,7 +57,7 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) {
|
|||
auto grad = new float[24];
|
||||
lite::tensor::Tensor grad_tensor(TypeId::kNumberTypeFloat32, dim_y);
|
||||
grad_tensor.SetData(grad);
|
||||
std::vector<lite::tensor::Tensor *> outputs = {&grad_tensor, &loss_tensor};
|
||||
std::vector<lite::tensor::Tensor *> outputs = {&loss_tensor, &grad_tensor};
|
||||
|
||||
kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftmaxCrossEntropy};
|
||||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,2 @@
|
|||
V_‚?<1A>K¿ŒÏ§¿øÓà¿…<C2BF>î>J?/=Å"m?LÒu¿œÂj@!U$?f=ˆ?e¥[?W·Ú¾òÎ ½m©?æ e¾O™·?}4¿¬žˆ?˜B<–ÚK¿íÕÀše›¾¶ÆÚ½Ýø¿p~|>½?ƒ¿Š7ì½ç§ø½E :?JͿ̬>— ? Ä~?Ï«¾óN1¿?>HV¿ú|ʾ¨œØ=€{‰¿IðU?x›¿©v¾°W>úÉ[¾$˜î? ú•¿]›¿4Bu¿û
¾ç4@Ç׿¦+?Ÿ…z>ušB?Åä™=|e >MÚê>¢Ÿ>í¶ß?}Þ0?¹ö§¾©¿ëœ><3E>û†¾ ù
@<ç€?Âv<C382>?våZ?zäÅ¿@±í¾è.ο•8B?ðîo½
Œ¿ªâÔ¾q"m¿n¯‰?”k>=ì"ê>:©¿³»@ÇÉ<>+R¿
|
||||
b±¿6.Œ?“i?¶›v?`j6¿R~]?çJU¼6„s¿GöŒ?M·—¿% ž?Äh”>£È¿½ÇÍ ¿¯ÚG¼Â½?„¦>³Ó“¾«'6?Æ÷@¿<>¥ð¿2¿ƒ/V¾K5è¿TÆ>X]?„[Ý?v_Ø¿¥ü¤¾”j?pý˜?€\l?ã.l=°äÀb©?
|
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
<EFBFBD><EFBFBD><EFBFBD>B<EFBFBD><EFBFBD><EFBFBD>B<EFBFBD><EFBFBD><EFBFBD>B
|
|
@ -0,0 +1 @@
|
|||
å:c2;@e<
|
|
@ -0,0 +1 @@
|
|||
=}…?ÂM€?Z|?
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue