!29667 [MS][LITE] sync grad op codecheck

Merge pull request !29667 from zhengjun10/codecheck
This commit is contained in:
i-robot 2022-02-08 11:22:27 +00:00 committed by Gitee
commit 01363adce2
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
46 changed files with 136 additions and 148 deletions

View File

@ -20,6 +20,7 @@
#include <string>
#include <vector>
#include <memory>
#include <utility>
#include "include/api/data_type.h"
#include "include/api/dual_abi_helper.h"
@ -28,6 +29,8 @@ class Model;
class ModelImpl;
class CallbackImpl;
using GraphPoint = std::pair<int, float>;
struct TrainCallBackData {
TrainCallBackData(bool train_mode, int epoch, int step, Model *model): train_mode_(train_mode), epoch_(epoch),
step_(step), model_(model) {}

View File

@ -21,8 +21,6 @@
#include <utility>
#include "include/api/callback/callback.h"
using GraphPoint = std::pair<int, float>;
namespace mindspore {
class LossMonitor: public TrainCallBack {

View File

@ -24,8 +24,6 @@
#include "include/api/callback/callback.h"
#include "include/api/metrics/accuracy.h"
using GraphPoint = std::pair<int, float>;
namespace mindspore {
class TrainAccuracy: public TrainCallBack {

View File

@ -19,7 +19,7 @@
#include "nnacl/op_base.h"
typedef struct GatherNdParameter {
typedef struct {
// Primitive parameter
OpParameter op_parameter_;
} GatherNdParameter;

View File

@ -18,7 +18,7 @@
#include "nnacl/op_base.h"
typedef struct BinaryCrossEntropyParameter {
typedef struct {
OpParameter op_parameter_;
int reduction;
} BinaryCrossEntropyParameter;

View File

@ -18,7 +18,7 @@
#include "nnacl/op_base.h"
typedef struct BinaryCrossEntropyGradParameter {
typedef struct {
OpParameter op_parameter_;
int reduction;
} BinaryCrossEntropyGradParameter;

View File

@ -19,7 +19,7 @@
#include "nnacl/op_base.h"
typedef struct DropoutParameter {
typedef struct {
OpParameter op_parameter_;
float ratio_;
} DropoutParameter;

View File

@ -240,7 +240,7 @@ static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size
}
for (; ri < row; ri++) {
for (int i = 0; i < col; i++) {
for (size_t i = 0; i < col; i++) {
dst_r[i * C12NUM] = src_r[i];
}
src_r += lead;
@ -457,7 +457,7 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_
dst_r += C8NUM * col;
}
for (; ri < row; ri++) {
for (int i = 0; i < col; i++) {
for (size_t i = 0; i < col; i++) {
dst_r[i * C8NUM] = src_r[i];
}
src_r += lead;

View File

@ -18,7 +18,7 @@
#include "nnacl/op_base.h"
typedef struct LayerNormGradParameter {
typedef struct {
OpParameter op_parameter_;
int begin_norm_axis_;
int begin_params_axis_;

View File

@ -19,20 +19,20 @@
#include "nnacl/op_base.h"
typedef struct ApplyMomentumParameter {
typedef struct {
OpParameter op_parameter_;
bool use_nesterov_;
float grad_scale_;
} ApplyMomentumParameter;
typedef struct SgdParameter {
typedef struct {
OpParameter op_parameter_;
float dampening_;
bool use_nesterov_;
float weight_decay_;
} SgdParameter;
typedef struct AdamParameter {
typedef struct {
OpParameter op_parameter_;
bool use_nesterov_;
} AdamParameter;

View File

@ -19,7 +19,7 @@
#include "nnacl/op_base.h"
typedef struct SmoothL1LossParameter {
typedef struct {
OpParameter op_parameter_;
float beta_;
} SmoothL1LossParameter;

View File

@ -185,7 +185,7 @@ float NetRunner::CalculateAccuracy(int max_tests) {
Rescaler rescale(kScalePoint);
loop_->Eval(test_ds_.get(), std::vector<TrainLoopCallBack *>{&rescale});
loop_->Eval(test_ds_.get(), std::vector<TrainLoopCallBack *>{&rescale}, nullptr, INT_MAX);
std::cout << "Accuracy is " << acc_metrics_->Eval() << std::endl;
return 0.0;
@ -222,12 +222,13 @@ int NetRunner::TrainLoop() {
Measurement measure(epochs_);
if (virtual_batch_ > 0) {
loop_->Train(epochs_, train_ds_.get(), std::vector<TrainLoopCallBack *>{&rescale, &lm, &cs, &am, &measure});
loop_->Train(epochs_, train_ds_.get(), std::vector<TrainLoopCallBack *>{&rescale, &lm, &cs, &am, &measure},
nullptr);
} else {
struct mindspore::lite::StepLRLambda step_lr_lambda(1, kGammaFactor);
mindspore::lite::LRScheduler step_lr_sched(mindspore::lite::StepLRLambda, static_cast<void *>(&step_lr_lambda), 1);
loop_->Train(epochs_, train_ds_.get(),
std::vector<TrainLoopCallBack *>{&rescale, &lm, &cs, &am, &step_lr_sched, &measure});
std::vector<TrainLoopCallBack *>{&rescale, &lm, &cs, &am, &step_lr_sched, &measure}, nullptr);
}
return 0;

View File

@ -21,8 +21,6 @@
#include <unordered_map>
#include "include/train/train_loop.h"
using GraphPoint = std::pair<int, float>;
namespace mindspore {
namespace lite {

View File

@ -15,24 +15,22 @@
*/
#ifndef MINDSPORE_LITE_INCLUDE_TRAIN_CKPT_SAVER_H_
#define MINDSPORE_LITE_INCLUDE_TRAIN_CKPT_SAVER_H_
#include <stdio.h>
#include <cstdio>
#include <vector>
#include <string>
#include <utility>
#include <unordered_map>
#include "include/train/train_loop.h"
using GraphPoint = std::pair<int, float>;
namespace mindspore {
namespace lite {
class CkptSaver : public session::TrainLoopCallBack {
public:
CkptSaver(int save_every_n, const std::string &filename_prefix)
: save_every_n_(save_every_n), filename_prefix_(filename_prefix) {}
CkptSaver(size_t save_every_n, std::string filename_prefix)
: save_every_n_(save_every_n), filename_prefix_(std::move(filename_prefix)) {}
~CkptSaver() = default;
~CkptSaver() override = default;
int EpochEnd(const session::TrainLoopCallBackData &cb_data) override {
if ((cb_data.epoch_ + 1) % save_every_n_ == 0) {
@ -44,7 +42,7 @@ class CkptSaver : public session::TrainLoopCallBack {
}
private:
int save_every_n_;
size_t save_every_n_;
std::string filename_prefix_;
};

View File

@ -24,8 +24,6 @@
#include "include/train/train_loop.h"
#include "include/train/accuracy_metrics.h"
using GraphPoint = std::pair<int, float>;
namespace mindspore {
namespace lite {

View File

@ -22,8 +22,6 @@
#include <unordered_map>
#include "include/train/train_loop_callback.h"
using GraphPoint = std::pair<int, float>;
namespace mindspore {
namespace lite {

View File

@ -36,13 +36,7 @@ class MixPrecisionCfg {
this->keep_batchnorm_fp32_ = rhs.keep_batchnorm_fp32_;
this->num_of_not_nan_iter_th_ = rhs.num_of_not_nan_iter_th_;
}
MixPrecisionCfg &operator=(MixPrecisionCfg const &rhs) {
this->dynamic_loss_scale_ = rhs.dynamic_loss_scale_;
this->loss_scale_ = rhs.loss_scale_;
this->keep_batchnorm_fp32_ = rhs.keep_batchnorm_fp32_;
this->num_of_not_nan_iter_th_ = rhs.num_of_not_nan_iter_th_;
return *this;
}
MixPrecisionCfg &operator=(MixPrecisionCfg const &rhs) = default;
bool dynamic_loss_scale_ = false; /**< Enable\disable dynamic loss scale during mix precision training */
float loss_scale_; /**< Initial loss scale factor */
bool keep_batchnorm_fp32_ = true; /**< Keep batch norm in FP32 while training */
@ -58,12 +52,7 @@ class TrainCfg {
this->mix_precision_cfg_ = rhs.mix_precision_cfg_;
this->accumulate_gradients_ = rhs.accumulate_gradients_;
}
TrainCfg &operator=(const TrainCfg &rhs) {
this->loss_name_ = rhs.loss_name_;
this->mix_precision_cfg_ = rhs.mix_precision_cfg_;
this->accumulate_gradients_ = rhs.accumulate_gradients_;
return *this;
}
TrainCfg &operator=(const TrainCfg &rhs) = default;
std::vector<std::string> loss_name_ = {"loss_fct"}; /**< Set part of the name that identify a loss kernel */
MixPrecisionCfg mix_precision_cfg_; /**< Mix precision configuration */
bool accumulate_gradients_ = false; /**< If true gardents are accmulated and can be read by GetGradients */

View File

@ -18,7 +18,6 @@
#include <vector>
#include <string>
#include <tuple>
#include <climits>
#include <unordered_map>
#include "include/train/train_loop_callback.h"
#include "include/train/metrics.h"
@ -87,7 +86,7 @@ class TrainLoop {
///
/// \return 0 on success or -1 in case of error
virtual int Train(int epochs, mindspore::dataset::Dataset *dataset, std::vector<TrainLoopCallBack *> cbs,
LoadDataFunc load_func = nullptr) = 0;
LoadDataFunc load_func) = 0;
/// \brief Performs loop over all data in Eval Mode
///
@ -97,8 +96,8 @@ class TrainLoop {
/// \param[in] max_steps (with default = INT_MAX the method iterates all dataset)
///
/// \return 0 on success or -1 in case of error
virtual int Eval(mindspore::dataset::Dataset *dataset, std::vector<TrainLoopCallBack *> cbs,
LoadDataFunc load_func = nullptr, int max_steps = INT_MAX) = 0;
virtual int Eval(mindspore::dataset::Dataset *dataset, std::vector<TrainLoopCallBack *> cbs, LoadDataFunc load_func,
int max_steps) = 0;
};
} // namespace session
} // namespace mindspore

View File

@ -18,7 +18,9 @@
#include <vector>
#include <string>
#include <tuple>
#include <utility>
#include <unordered_map>
#include "include/api/callback/callback.h"
namespace mindspore {
namespace session {

View File

@ -58,7 +58,7 @@ Status Model::Train(int epochs, std::shared_ptr<dataset::Dataset> ds, std::vecto
return status;
}
auto ret = loop->Train(epochs, ds.get(), cbs);
auto ret = loop->Train(epochs, ds.get(), cbs, nullptr);
clearVectorOfPointers(&adapter_metrics);
clearVectorOfPointers(&adapter_cbs);
@ -98,7 +98,7 @@ Status Model::Evaluate(std::shared_ptr<dataset::Dataset> ds, std::vector<TrainCa
return status;
}
auto ret = loop->Eval(ds.get(), cbs);
auto ret = loop->Eval(ds.get(), cbs, nullptr, INT_MAX);
clearVectorOfPointers(&adapter_metrics);
clearVectorOfPointers(&adapter_cbs);

View File

@ -15,7 +15,7 @@
*/
#include "src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.h"
#include <math.h>
#include <cmath>
#include <algorithm>
#include <vector>
#include <string>

View File

@ -99,7 +99,6 @@ int AdamCPUKernel::Execute(int task_id) {
}
int AdamRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
CHECK_NULL_RETURN(cdata);
auto adam_kernel = reinterpret_cast<AdamCPUKernel *>(cdata);
CHECK_NULL_RETURN(adam_kernel);
auto error_code = RET_OK;

View File

@ -85,13 +85,13 @@ int ArithmeticGradCPUKernel::Prepare() {
int ArithmeticGradCPUKernel::ArithmeticGradAdd(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2,
int dx2_size) {
if (dx1_size == dy_size) {
memcpy(dx1, dy, dy_size * sizeof(float));
memcpy(dx1, dy, static_cast<size_t>(dy_size) * sizeof(float));
} else {
ReduceSumByAxes(dy, arithmeticParameter_->out_shape_, dx1, arithmeticParameter_->in_shape0_,
arithmeticParameter_->ndim_);
}
if (dx2_size == dy_size) {
memcpy(dx2, dy, dy_size * sizeof(float));
memcpy(dx2, dy, static_cast<size_t>(dy_size) * sizeof(float));
} else {
ReduceSumByAxes(dy, arithmeticParameter_->out_shape_, dx2, arithmeticParameter_->in_shape1_,
arithmeticParameter_->ndim_);
@ -102,7 +102,7 @@ int ArithmeticGradCPUKernel::ArithmeticGradAdd(float *dy, int dy_size, float *dx
int ArithmeticGradCPUKernel::ArithmeticGradSub(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2,
int dx2_size) {
if (dx1_size == dy_size) {
memcpy(dx1, dy, dy_size * sizeof(float));
memcpy(dx1, dy, static_cast<size_t>(dy_size) * sizeof(float));
} else {
ReduceSumByAxes(dy, arithmeticParameter_->out_shape_, dx1, arithmeticParameter_->in_shape0_,
arithmeticParameter_->ndim_);
@ -263,7 +263,6 @@ int ArithmeticGradCPUKernel::Execute(int task_id) {
}
int ArithmeticGradRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
CHECK_NULL_RETURN(cdata);
auto Arithmetic_kernel = reinterpret_cast<ArithmeticGradCPUKernel *>(cdata);
CHECK_NULL_RETURN(Arithmetic_kernel);
auto error_code = Arithmetic_kernel->Execute(task_id);

View File

@ -39,8 +39,12 @@ class ArithmeticGradCPUKernel : public InnerKernel {
public:
explicit ArithmeticGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: InnerKernel(parameter, inputs, outputs, ctx), tile_data0(NULL), tile_data1(NULL), tile_data2(NULL) {
switch (type()) {
: InnerKernel(parameter, inputs, outputs, ctx),
arithmetic_grad_(nullptr),
tile_data0(nullptr),
tile_data1(nullptr),
tile_data2(nullptr) {
switch (parameter->type_) {
case PrimitiveType_MulGrad:
arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul; // this will be adjusted in InferShape
break;

View File

@ -29,8 +29,8 @@ class ArithmeticSelfGradCPUKernel : public InnerKernel {
public:
ArithmeticSelfGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: InnerKernel(parameter, inputs, outputs, ctx), thread_count_(ctx->thread_num_) {}
~ArithmeticSelfGradCPUKernel() override {}
: InnerKernel(parameter, inputs, outputs, ctx), thread_count_(ctx->thread_num_), self_grad_operation_(nullptr) {}
~ArithmeticSelfGradCPUKernel() override = default;
int Prepare() override;
int ReSize() override;
int Run() override;

View File

@ -36,7 +36,7 @@ int AssignCPUKernel::Execute(int task_id) {
CHECK_NULL_RETURN(y);
int length = in_tensors_.at(0)->ElementsNum();
int stride = UP_DIV(length, thread_count_);
int count = MSMIN(stride, length - stride * task_id);
size_t count = MSMIN(stride, length - stride * task_id);
int start = stride * task_id;

View File

@ -62,7 +62,7 @@ int BiasGradCPUKernel::Execute(int task_id) {
size_t nhw_size = 1;
size_t channels = bias_param->in_shape0_[bias_param->ndim_ - 1]; // C in NHWC
for (unsigned int i = 0; i < bias_param->ndim_ - 1; i++) {
nhw_size *= bias_param->in_shape0_[i];
nhw_size *= static_cast<size_t>(bias_param->in_shape0_[i]);
}
size_t total_size = channels * nhw_size;

View File

@ -15,7 +15,6 @@
*/
#include "src/runtime/kernel/arm/fp32_grad/bn_grad.h"
#include <cmath>
#include <algorithm>
#include <vector>
#include <string>
@ -121,6 +120,9 @@ int BNGradCPUKernel::Execute(int task_id) {
case 2:
std::fill(dscale, dscale + channels, 0.f);
break;
default:
MS_LOG(ERROR) << "Exceeds the maximum thread";
return RET_ERROR;
}
}
if (thread_num == 1) {
@ -137,6 +139,9 @@ int BNGradCPUKernel::Execute(int task_id) {
scale, count, total, channels, dx + task_id * stride * channels);
break;
}
default:
MS_LOG(ERROR) << "Unsupported stage";
return RET_ERROR;
}
return RET_OK;

View File

@ -54,22 +54,18 @@ int ConvolutionTrainCPUKernel::ReSize() {
const int n = conv_param_->output_channel_ * conv_param_->group_;
const int k = conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ / conv_param_->group_;
do_img2col_ = (conv_param_->kernel_h_ == 1) && (conv_param_->kernel_w_ == 1) && (conv_param_->pad_d_ == 0) &&
(conv_param_->pad_u_ == 0) && (conv_param_->pad_l_ == 0) && (conv_param_->pad_r_ == 0) &&
(conv_param_->dilation_h_ == 1) && (conv_param_->dilation_w_ == 1) &&
(conv_param_->stride_h_ == 1) && (conv_param_->stride_w_ == 1) && (conv_param_->group_ == 1)
? false
: true;
do_img2col_ = !((conv_param_->kernel_h_ == 1) && (conv_param_->kernel_w_ == 1) && (conv_param_->pad_d_ == 0) &&
(conv_param_->pad_u_ == 0) && (conv_param_->pad_l_ == 0) && (conv_param_->pad_r_ == 0) &&
(conv_param_->dilation_h_ == 1) && (conv_param_->dilation_w_ == 1) && (conv_param_->stride_h_ == 1) &&
(conv_param_->stride_w_ == 1) && (conv_param_->group_ == 1));
do_dw_ = (conv_param_->output_channel_ == conv_param_->group_) &&
(conv_param_->input_channel_ == conv_param_->output_channel_) && (conv_param_->dilation_h_ == 1) &&
(conv_param_->dilation_w_ == 1)
? true
: false;
(conv_param_->input_channel_ == conv_param_->output_channel_) && (conv_param_->dilation_h_ == 1) &&
(conv_param_->dilation_w_ == 1);
ws_size_ = chunk_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_;
ws_size_ = do_dw_ ? ws_size_ : ws_size_ / conv_param_->group_;
int mat_alloc = MatSizeTotal(chunk_, n, k, 0);
set_workspace_size((ws_size_ + mat_alloc) * sizeof(float));
set_workspace_size(static_cast<size_t>(ws_size_ + mat_alloc) * sizeof(float));
return RET_OK;
}
@ -139,7 +135,7 @@ int ConvolutionTrainCPUKernel::Execute(int task_id) {
}
} else {
mat_b = w_addr;
const size_t in_plane_size = in_ch * in_h * in_w;
const int in_plane_size = in_ch * in_h * in_w;
for (int i = 0; i < batch; ++i) {
im = x_addr + i * in_plane_size;
for (int ci = 0; ci < m; ci += chunk_) {

View File

@ -61,26 +61,21 @@ int ConvolutionGradFilterCPUKernel::ReSize() {
conv_param->output_w_ = dy_tensor->shape()[kNHWC_W];
NNACL_CHECK_ZERO_RETURN_ERR(conv_param->group_);
do_img2col_ = (conv_param->kernel_h_ == 1) && (conv_param->kernel_w_ == 1) && (conv_param->pad_d_ == 0) &&
(conv_param->pad_u_ == 0) && (conv_param->pad_l_ == 0) && (conv_param->pad_r_ == 0) &&
(conv_param->dilation_h_ == 1) && (conv_param->dilation_w_ == 1) && (conv_param->stride_h_ == 1) &&
(conv_param->stride_w_ == 1) && (conv_param->group_ == 1)
? false
: true;
do_img2col_ = !((conv_param->kernel_h_ == 1) && (conv_param->kernel_w_ == 1) && (conv_param->pad_d_ == 0) &&
(conv_param->pad_u_ == 0) && (conv_param->pad_l_ == 0) && (conv_param->pad_r_ == 0) &&
(conv_param->dilation_h_ == 1) && (conv_param->dilation_w_ == 1) && (conv_param->stride_h_ == 1) &&
(conv_param->stride_w_ == 1) && (conv_param->group_ == 1));
do_dw_ = (conv_param->output_channel_ == conv_param->group_) &&
(conv_param->input_channel_ == conv_param->output_channel_) && (conv_param->dilation_h_ == 1) &&
(conv_param->dilation_w_ == 1)
? true
: false;
(conv_param->input_channel_ == conv_param->output_channel_) && (conv_param->dilation_h_ == 1) &&
(conv_param->dilation_w_ == 1);
ws_size_ = chunk_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_;
ws_size_ = do_dw_ ? ws_size_ : ws_size_ / conv_param->group_;
ws_size_ = do_dw_ ? ws_size_ : ws_size_ / static_cast<size_t>(conv_param->group_);
int n = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_ / conv_param->group_;
int k = conv_param->output_channel_ / conv_param->group_;
int thread_num = op_parameter_->thread_num_;
auto thread_num = static_cast<size_t>(op_parameter_->thread_num_);
mat_alloc_ = MatSizeTotal(k, n, chunk_, 0);
set_workspace_size((ws_size_ + mat_alloc_ + (k * n)) * thread_num * sizeof(float));
set_workspace_size((ws_size_ + mat_alloc_ + static_cast<size_t>(k * n)) * thread_num * sizeof(float));
return RET_OK;
}
@ -97,27 +92,24 @@ int ConvolutionGradFilterCPUKernel::Execute(int task_id) {
auto dy_addr = reinterpret_cast<float *>(input_dy->MutableData());
auto dw_addr = reinterpret_cast<float *>(out_dw->MutableData());
int nweights = out_dw->ElementsNum();
int in_ch = conv_param->input_channel_;
int in_h = conv_param->input_h_;
int in_w = conv_param->input_w_;
int k_h = conv_param->kernel_h_;
int k_w = conv_param->kernel_w_;
int batch = conv_param->output_batch_;
int out_ch = conv_param->output_channel_;
int groups = conv_param->group_;
int out_h = conv_param->output_h_;
int out_w = conv_param->output_w_;
int m = out_h * out_w;
int m = conv_param->output_h_ * conv_param->output_w_;
int n = k_h * k_w * in_ch / groups;
int k = out_ch / groups;
int thread_num = op_parameter_->thread_num_;
float *workspace_temp = reinterpret_cast<float *>(workspace());
float *mat_workspace = workspace_temp + ws_size_ * thread_num + task_id * (mat_alloc_ + k * n);
auto *workspace_temp = reinterpret_cast<float *>(workspace());
float *mat_workspace =
workspace_temp + static_cast<int>(ws_size_) * thread_num + task_id * (static_cast<int>(mat_alloc_) + k * n);
float *mat_tmp = mat_workspace + mat_alloc_;
int stride = UP_DIV(batch, thread_num);
int count = MSMIN(stride, batch - stride * task_id);
int stride = UP_DIV(conv_param->output_batch_, thread_num);
int count = MSMIN(stride, conv_param->output_batch_ - stride * task_id);
count = (count < 0) ? 0 : count;
int start = stride * task_id;
int end = start + count;
@ -140,7 +132,7 @@ int ConvolutionGradFilterCPUKernel::Execute(int task_id) {
end = start + count;
const int kernel_spatial = k_h * k_w;
for (int i = 0; i < batch; ++i) {
for (int i = 0; i < conv_param->output_batch_; ++i) {
for (int ci = 0; ci < m; ci += chunk_) {
real_chunk = MSMIN(m - ci, chunk_);
mat_b = workspace_temp + task_id * ws_size_;
@ -148,7 +140,7 @@ int ConvolutionGradFilterCPUKernel::Execute(int task_id) {
RollingIm2ColPackDwUnitFp32(im, conv_param, mat_b, real_chunk, ci);
for (int j = start; j < end; ++j) {
mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups) + ci * out_ch;
mat_c = dw_addr + j * nweights / groups;
mat_c = dw_addr + j * out_dw->ElementsNum() / groups;
GemmMatmul(1, 0, k, n, real_chunk, 1, mat_a, out_ch, mat_b + (j * kernel_spatial), n * groups, 1, mat_c, n,
mat_workspace);
}
@ -161,8 +153,8 @@ int ConvolutionGradFilterCPUKernel::Execute(int task_id) {
for (int j = 0; j < groups; ++j) {
real_chunk = MSMIN(m - ci, chunk_);
mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups) + ci * out_ch;
mat_b = workspace_temp + task_id * ws_size_;
mat_c = dw_addr + j * nweights / groups;
mat_b = workspace_temp + task_id * static_cast<int>(ws_size_);
mat_c = dw_addr + j * out_dw->ElementsNum() / groups;
im = x_addr + (i * in_ch * in_h * in_w) + j * (in_ch / groups);
RollingIm2ColPackUnitFp32(im, conv_param, mat_b, real_chunk, ci);
GemmMatmul(1, 0, k, n, real_chunk, 1, mat_a, out_ch, mat_b, n, 0, mat_tmp, n, mat_workspace);
@ -172,17 +164,16 @@ int ConvolutionGradFilterCPUKernel::Execute(int task_id) {
}
}
} else {
NNACL_CHECK_ZERO_RETURN_ERR(out_w * conv_param->stride_h_);
NNACL_CHECK_ZERO_RETURN_ERR(out_w * conv_param->stride_w_);
NNACL_CHECK_ZERO_RETURN_ERR(conv_param->output_w_);
mat_c = dw_addr;
const size_t in_plane_size = in_ch * in_h * in_w;
auto in_plane_size = in_ch * in_h * in_w;
for (int i = start; i < end; ++i) {
for (int ci = 0; ci < m; ci += chunk_) {
real_chunk = MSMIN(m - ci, chunk_);
mat_a = dy_addr + i * m * k + ci * out_ch;
im = x_addr + i * in_plane_size;
int input_h = ci / out_w * conv_param->stride_h_;
int input_w = ci % out_w * conv_param->stride_w_;
int input_h = ci / conv_param->output_w_ * conv_param->stride_h_;
int input_w = ci % conv_param->output_w_ * conv_param->stride_w_;
int offset = (input_h * in_w + input_w) * in_ch;
GemmMatmul(1, 0, k, n, real_chunk, 1, mat_a, out_ch, im + offset, n, 0, mat_tmp, n, mat_workspace);
std::unique_lock<std::mutex> merge_lock(lock_);

View File

@ -54,7 +54,7 @@ int ConvolutionGradInputCPUKernel::ReSize() {
int n = conv_param->kernel_w_ * conv_param->kernel_h_ * conv_param->input_channel_ / conv_param->group_;
int k = conv_param->output_channel_ / conv_param->group_;
int thread_num = op_parameter_->thread_num_;
auto thread_num = static_cast<size_t>(op_parameter_->thread_num_);
mat_alloc_ = MatSizeTotal(chunk_, n, k, 0);
set_workspace_size((ws_size_ + mat_alloc_) * sizeof(float) * thread_num);
@ -102,7 +102,8 @@ int ConvolutionGradInputCPUKernel::Execute(int task_id) {
int m = out_h * out_w;
int n = k_w * k_h * in_ch / groups;
int k = out_ch / groups;
float *workspace_temp = reinterpret_cast<float *>(workspace()) + task_id * (mat_alloc_ + ws_size_);
float *workspace_temp =
reinterpret_cast<float *>(workspace()) + static_cast<size_t>(task_id) * (mat_alloc_ + ws_size_);
float *mat_workspace = workspace_temp + ws_size_;
int stride = UP_DIV(batch, thread_num);
int count = MSMIN(stride, batch - stride * task_id);
@ -169,10 +170,10 @@ int ConvolutionGradInputRun(void *cdata, int task_id, float lhs_scale, float rhs
int ConvolutionGradInputCPUKernel::Run() {
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_);
int batch = conv_param->output_batch_;
int in_ch = conv_param->input_channel_;
int in_h = conv_param->input_h_;
int in_w = conv_param->input_w_;
auto batch = static_cast<size_t>(conv_param->output_batch_);
auto in_ch = static_cast<size_t>(conv_param->input_channel_);
auto in_h = static_cast<size_t>(conv_param->input_h_);
auto in_w = static_cast<size_t>(conv_param->input_w_);
auto *out_dx = out_tensors_.at(0);
auto dx_addr = reinterpret_cast<float *>(out_dx->MutableData());
memset(dx_addr, 0, sizeof(float) * batch * in_ch * in_h * in_w);

View File

@ -80,7 +80,7 @@ int DropoutCPUKernel::Execute(int task_id) {
std::bernoulli_distribution distribution(param->ratio_);
for (int i = start; i < end; i++) {
mask[i] = distribution(generator);
mask[i] = static_cast<float>(distribution(generator));
output_ptr[i] = input_ptr[i] * mask[i] * scale_;
}
}

View File

@ -34,7 +34,7 @@ class DropoutGradCPUKernel : public InnerKernel {
int Execute(int task_id);
private:
float scale_;
float scale_ = 1.0f;
int thread_count_ = 1;
};

View File

@ -73,13 +73,13 @@ int ResizeGradCPUKernel::Execute(int task_id) {
CHECK_NULL_RETURN(param);
auto batch_size = in_tensors_.at(0)->Batch();
auto channel = in_tensors_.at(0)->Channel();
int error_code = NNACL_OK;
int error_code;
if (param->method == static_cast<int>(schema::ResizeMethod_NEAREST)) {
error_code = ResizeNearestNeighborGrad(in_addr, out_addr, batch_size, channel, in_tensors_.at(0)->format(), param);
} else {
error_code = ResizeBiLinearGrad(in_addr, out_addr, batch_size, channel, in_tensors_.at(0)->format(), param);
}
if (error_code != NNACL_OK) {
if (error_code != static_cast<int>(NNACL_OK)) {
MS_LOG(ERROR) << "Resize fp32 grad failed.";
return error_code;
}

View File

@ -15,7 +15,6 @@
*/
#include "src/runtime/kernel/arm/fp32_grad/sigmoid_cross_entropy_with_logits.h"
#include <cmath>
#include "src/kernel_registry.h"
#include "include/errorcode.h"

View File

@ -15,7 +15,6 @@
*/
#include "src/runtime/kernel/arm/fp32_grad/sigmoid_cross_entropy_with_logits_grad.h"
#include <cmath>
#include "src/kernel_registry.h"
#include "include/errorcode.h"

View File

@ -30,7 +30,7 @@ constexpr static int kOutputIdx = 0;
int SmoothL1LossCPUKernel::ReSize() { return RET_OK; }
int SmoothL1LossCPUKernel::Execute(int task_id) {
int SmoothL1LossCPUKernel::Execute(size_t task_id) {
SmoothL1LossParameter *smooth_l1_loss_param = reinterpret_cast<SmoothL1LossParameter *>(op_parameter_);
CHECK_NULL_RETURN(smooth_l1_loss_param);
auto predict = reinterpret_cast<float *>(in_tensors_.at(kPredictIdx)->MutableData());
@ -42,7 +42,7 @@ int SmoothL1LossCPUKernel::Execute(int task_id) {
const size_t length = in_tensors_.at(kPredictIdx)->ElementsNum();
size_t stride = UP_DIV(length, thread_count_);
int count = MSMIN(stride, length - stride * task_id);
size_t count = MSMIN(stride, length - stride * task_id);
size_t start = stride * task_id;
size_t end = start + count;

View File

@ -33,11 +33,11 @@ class SmoothL1LossCPUKernel : public InnerKernel {
int Prepare() override;
int ReSize() override;
int Run() override;
int Execute(int task_id);
int Execute(size_t task_id);
private:
SmoothL1LossParameter *smooth_l1_param_;
int thread_count_ = 1;
size_t thread_count_ = 1;
};
} // namespace mindspore::kernel

View File

@ -33,7 +33,7 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *lab
float *output2) const {
float eps = 1e-6;
if (grads != nullptr) {
for (int i = 0; i < param_->batch_size_; ++i) {
for (size_t i = 0; i < static_cast<size_t>(param_->batch_size_); ++i) {
float loss = 0.f;
for (size_t j = 0; j < param_->number_of_classes_; ++j) {
float logit =
@ -45,7 +45,7 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *lab
output2[i] = loss;
}
} else {
for (int i = 0; i < param_->batch_size_; ++i) {
for (size_t i = 0; i < static_cast<size_t>(param_->batch_size_); ++i) {
float loss = 0.f;
for (size_t j = 0; j < param_->number_of_classes_; ++j) {
float logit =
@ -123,7 +123,7 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::ReSize() {
}
size_t data_size = in_tensors_.at(0)->ElementsNum();
set_workspace_size((data_size + dims.at(0)) * sizeof(float));
set_workspace_size((data_size + static_cast<size_t>(dims.at(0))) * sizeof(float));
sm_params_.n_dim_ = 2;
sm_params_.element_size_ = data_size;
sm_params_.axis_ = 1;

View File

@ -33,7 +33,7 @@ class SoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel {
: LossKernel(parameter, inputs, outputs, ctx) {
param_ = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter);
}
~SoftmaxCrossEntropyWithLogitsCPUKernel() override {}
~SoftmaxCrossEntropyWithLogitsCPUKernel() override = default;
void ForwardPostExecute(const float *labels, const float *logits, float *output1, float *output2) const;

View File

@ -49,9 +49,9 @@ int SoftmaxGradCPUKernel::Prepare() {
inner_size_ = 1;
for (size_t i = axis + 1; i < in_dims; i++) {
inner_size_ *= in_shape.at(i);
inner_size_ *= static_cast<size_t>(in_shape.at(i));
}
set_workspace_size(inner_size_ * (1 + in_shape.at(axis)) * sizeof(float));
set_workspace_size(inner_size_ * (1 + static_cast<size_t>(in_shape.at(axis))) * sizeof(float));
return RET_OK;
}

View File

@ -37,7 +37,7 @@ class SoftmaxGradCPUKernel : public InnerKernel {
private:
SoftmaxParameter *param;
size_t inner_size_;
size_t inner_size_ = 0;
};
} // namespace mindspore::kernel

View File

@ -34,7 +34,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const int *
float *output) const {
float total_loss = 0;
MS_CHECK_GT(param->batch_size_, 0, RET_ERROR);
for (int i = 0; i < param->batch_size_; ++i) {
for (size_t i = 0; i < static_cast<size_t>(param->batch_size_); ++i) {
if (labels[i] < 0) {
MS_LOG(ERROR) << "label value must >= 0";
return RET_ERROR;
@ -91,7 +91,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) {
float *losses = static_cast<float *>(workspace());
CHECK_NULL_RETURN(losses);
float *sum_data = losses + data_size;
int length = sm_params_.input_shape_[sm_params_.axis_];
int length = sm_params_->input_shape_[sm_params_->axis_];
int stride = UP_DIV(outter_size_, threads_);
int count = MSMIN(stride, outter_size_ - stride * task_id);
if (count <= 0) return RET_OK;
@ -108,6 +108,9 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) {
} else {
return ForwardPostExecute(labels, losses, out);
}
default:
MS_LOG(ERROR) << "Unsupported stage";
return RET_ERROR;
}
return RET_OK;
}
@ -125,9 +128,9 @@ int SparseSoftmaxCrossEntropyWithLogitsRun(void *cdata, int task_id, float lhs_s
}
int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Run() {
int axis = sm_params_.axis_;
int n_dim = sm_params_.n_dim_;
const int *input_shape = sm_params_.input_shape_;
int axis = sm_params_->axis_;
int n_dim = sm_params_->n_dim_;
const int *input_shape = sm_params_->input_shape_;
int inner_size = 1;
int outter_size = 1;
CHECK_NULL_RETURN(in_tensors_.at(0));
@ -136,7 +139,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Run() {
CHECK_NULL_RETURN(losses);
float *sum_data = losses + data_size;
std::fill(losses, losses + data_size, 0.f);
std::fill(sum_data, sum_data + sm_params_.input_shape_[0], 0.f);
std::fill(sum_data, sum_data + sm_params_->input_shape_[0], 0.f);
for (int i = 0; i < axis; i++) {
outter_size *= input_shape[i];
}
@ -182,12 +185,17 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Prepare() {
return RET_ERROR;
}
size_t data_size = in_tensors_.at(0)->ElementsNum();
set_workspace_size((data_size + dims.at(0)) * sizeof(float));
sm_params_.n_dim_ = 2;
sm_params_.element_size_ = static_cast<int>(data_size);
sm_params_.axis_ = 1;
set_workspace_size((data_size + static_cast<size_t>(dims.at(0))) * sizeof(float));
sm_params_ = new (std::nothrow) SoftmaxParameter();
if (sm_params_ == nullptr) {
MS_LOG(ERROR) << "new softmax param failed.";
return RET_ERROR;
}
sm_params_->n_dim_ = 2;
sm_params_->element_size_ = static_cast<int>(data_size);
sm_params_->axis_ = 1;
for (size_t i = 0; i < dims.size(); i++) {
sm_params_.input_shape_[i] = dims.at(i);
sm_params_->input_shape_[i] = dims.at(i);
}
return RET_OK;
}

View File

@ -34,7 +34,12 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel {
: LossKernel(parameter, inputs, outputs, ctx) {
param = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter);
}
~SparseSoftmaxCrossEntropyWithLogitsCPUKernel() override {}
~SparseSoftmaxCrossEntropyWithLogitsCPUKernel() override {
if (sm_params_ != nullptr) {
delete sm_params_;
sm_params_ = nullptr;
}
}
int ForwardPostExecute(const int *labels, const float *losses, float *output) const;
int GradPostExecute(const int *labels, const float *losses, float *grads) const;
@ -46,11 +51,11 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel {
private:
SoftmaxCrossEntropyParameter *param;
SoftmaxParameter sm_params_;
SoftmaxParameter *sm_params_ = nullptr;
int inner_size_ = 1;
int outter_size_ = 1;
int stage_;
int threads_;
int stage_ = 0;
int threads_ = 0;
};
} // namespace mindspore::kernel

View File

@ -46,13 +46,13 @@ int UnsortedSegmentSumCPUKernel::Prepare() {
for (size_t i = 0; i < input_shape.size(); ++i) {
unit_num_ *= input_shape[i];
if (i >= segment_ids_shape.size()) {
input_dim1_ *= input_shape[i];
input_dim1_ *= static_cast<size_t>(input_shape[i]);
}
}
output_dim0_ = output_shape[0];
output_dim1_ = 1;
for (size_t j = 1; j < output_shape.size(); j++) {
output_dim1_ *= output_shape[j];
output_dim1_ *= static_cast<size_t>(output_shape[j]);
}
return RET_OK;
}

View File

@ -25,7 +25,7 @@ class LossKernel : public InnerKernel {
LossKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: InnerKernel(parameter, inputs, outputs, ctx) {}
~LossKernel() = default;
~LossKernel() override = default;
};
} // namespace mindspore::kernel