forked from mindspore-Ecosystem/mindspore
!29667 [MS][LITE] sync grad op codecheck
Merge pull request !29667 from zhengjun10/codecheck
This commit is contained in:
commit
01363adce2
|
@ -20,6 +20,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "include/api/data_type.h"
|
||||
#include "include/api/dual_abi_helper.h"
|
||||
|
||||
|
@ -28,6 +29,8 @@ class Model;
|
|||
class ModelImpl;
|
||||
class CallbackImpl;
|
||||
|
||||
using GraphPoint = std::pair<int, float>;
|
||||
|
||||
struct TrainCallBackData {
|
||||
TrainCallBackData(bool train_mode, int epoch, int step, Model *model): train_mode_(train_mode), epoch_(epoch),
|
||||
step_(step), model_(model) {}
|
||||
|
|
|
@ -21,8 +21,6 @@
|
|||
#include <utility>
|
||||
#include "include/api/callback/callback.h"
|
||||
|
||||
using GraphPoint = std::pair<int, float>;
|
||||
|
||||
namespace mindspore {
|
||||
|
||||
class LossMonitor: public TrainCallBack {
|
||||
|
|
|
@ -24,8 +24,6 @@
|
|||
#include "include/api/callback/callback.h"
|
||||
#include "include/api/metrics/accuracy.h"
|
||||
|
||||
using GraphPoint = std::pair<int, float>;
|
||||
|
||||
namespace mindspore {
|
||||
|
||||
class TrainAccuracy: public TrainCallBack {
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
typedef struct GatherNdParameter {
|
||||
typedef struct {
|
||||
// Primitive parameter
|
||||
OpParameter op_parameter_;
|
||||
} GatherNdParameter;
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
typedef struct BinaryCrossEntropyParameter {
|
||||
typedef struct {
|
||||
OpParameter op_parameter_;
|
||||
int reduction;
|
||||
} BinaryCrossEntropyParameter;
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
typedef struct BinaryCrossEntropyGradParameter {
|
||||
typedef struct {
|
||||
OpParameter op_parameter_;
|
||||
int reduction;
|
||||
} BinaryCrossEntropyGradParameter;
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
typedef struct DropoutParameter {
|
||||
typedef struct {
|
||||
OpParameter op_parameter_;
|
||||
float ratio_;
|
||||
} DropoutParameter;
|
||||
|
|
|
@ -240,7 +240,7 @@ static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size
|
|||
}
|
||||
|
||||
for (; ri < row; ri++) {
|
||||
for (int i = 0; i < col; i++) {
|
||||
for (size_t i = 0; i < col; i++) {
|
||||
dst_r[i * C12NUM] = src_r[i];
|
||||
}
|
||||
src_r += lead;
|
||||
|
@ -457,7 +457,7 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_
|
|||
dst_r += C8NUM * col;
|
||||
}
|
||||
for (; ri < row; ri++) {
|
||||
for (int i = 0; i < col; i++) {
|
||||
for (size_t i = 0; i < col; i++) {
|
||||
dst_r[i * C8NUM] = src_r[i];
|
||||
}
|
||||
src_r += lead;
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
typedef struct LayerNormGradParameter {
|
||||
typedef struct {
|
||||
OpParameter op_parameter_;
|
||||
int begin_norm_axis_;
|
||||
int begin_params_axis_;
|
||||
|
|
|
@ -19,20 +19,20 @@
|
|||
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
typedef struct ApplyMomentumParameter {
|
||||
typedef struct {
|
||||
OpParameter op_parameter_;
|
||||
bool use_nesterov_;
|
||||
float grad_scale_;
|
||||
} ApplyMomentumParameter;
|
||||
|
||||
typedef struct SgdParameter {
|
||||
typedef struct {
|
||||
OpParameter op_parameter_;
|
||||
float dampening_;
|
||||
bool use_nesterov_;
|
||||
float weight_decay_;
|
||||
} SgdParameter;
|
||||
|
||||
typedef struct AdamParameter {
|
||||
typedef struct {
|
||||
OpParameter op_parameter_;
|
||||
bool use_nesterov_;
|
||||
} AdamParameter;
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
typedef struct SmoothL1LossParameter {
|
||||
typedef struct {
|
||||
OpParameter op_parameter_;
|
||||
float beta_;
|
||||
} SmoothL1LossParameter;
|
||||
|
|
|
@ -185,7 +185,7 @@ float NetRunner::CalculateAccuracy(int max_tests) {
|
|||
|
||||
Rescaler rescale(kScalePoint);
|
||||
|
||||
loop_->Eval(test_ds_.get(), std::vector<TrainLoopCallBack *>{&rescale});
|
||||
loop_->Eval(test_ds_.get(), std::vector<TrainLoopCallBack *>{&rescale}, nullptr, INT_MAX);
|
||||
std::cout << "Accuracy is " << acc_metrics_->Eval() << std::endl;
|
||||
|
||||
return 0.0;
|
||||
|
@ -222,12 +222,13 @@ int NetRunner::TrainLoop() {
|
|||
Measurement measure(epochs_);
|
||||
|
||||
if (virtual_batch_ > 0) {
|
||||
loop_->Train(epochs_, train_ds_.get(), std::vector<TrainLoopCallBack *>{&rescale, &lm, &cs, &am, &measure});
|
||||
loop_->Train(epochs_, train_ds_.get(), std::vector<TrainLoopCallBack *>{&rescale, &lm, &cs, &am, &measure},
|
||||
nullptr);
|
||||
} else {
|
||||
struct mindspore::lite::StepLRLambda step_lr_lambda(1, kGammaFactor);
|
||||
mindspore::lite::LRScheduler step_lr_sched(mindspore::lite::StepLRLambda, static_cast<void *>(&step_lr_lambda), 1);
|
||||
loop_->Train(epochs_, train_ds_.get(),
|
||||
std::vector<TrainLoopCallBack *>{&rescale, &lm, &cs, &am, &step_lr_sched, &measure});
|
||||
std::vector<TrainLoopCallBack *>{&rescale, &lm, &cs, &am, &step_lr_sched, &measure}, nullptr);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -21,8 +21,6 @@
|
|||
#include <unordered_map>
|
||||
#include "include/train/train_loop.h"
|
||||
|
||||
using GraphPoint = std::pair<int, float>;
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
||||
|
|
|
@ -15,24 +15,22 @@
|
|||
*/
|
||||
#ifndef MINDSPORE_LITE_INCLUDE_TRAIN_CKPT_SAVER_H_
|
||||
#define MINDSPORE_LITE_INCLUDE_TRAIN_CKPT_SAVER_H_
|
||||
#include <stdio.h>
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <unordered_map>
|
||||
#include "include/train/train_loop.h"
|
||||
|
||||
using GraphPoint = std::pair<int, float>;
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
||||
class CkptSaver : public session::TrainLoopCallBack {
|
||||
public:
|
||||
CkptSaver(int save_every_n, const std::string &filename_prefix)
|
||||
: save_every_n_(save_every_n), filename_prefix_(filename_prefix) {}
|
||||
CkptSaver(size_t save_every_n, std::string filename_prefix)
|
||||
: save_every_n_(save_every_n), filename_prefix_(std::move(filename_prefix)) {}
|
||||
|
||||
~CkptSaver() = default;
|
||||
~CkptSaver() override = default;
|
||||
|
||||
int EpochEnd(const session::TrainLoopCallBackData &cb_data) override {
|
||||
if ((cb_data.epoch_ + 1) % save_every_n_ == 0) {
|
||||
|
@ -44,7 +42,7 @@ class CkptSaver : public session::TrainLoopCallBack {
|
|||
}
|
||||
|
||||
private:
|
||||
int save_every_n_;
|
||||
size_t save_every_n_;
|
||||
std::string filename_prefix_;
|
||||
};
|
||||
|
||||
|
|
|
@ -24,8 +24,6 @@
|
|||
#include "include/train/train_loop.h"
|
||||
#include "include/train/accuracy_metrics.h"
|
||||
|
||||
using GraphPoint = std::pair<int, float>;
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
||||
|
|
|
@ -22,8 +22,6 @@
|
|||
#include <unordered_map>
|
||||
#include "include/train/train_loop_callback.h"
|
||||
|
||||
using GraphPoint = std::pair<int, float>;
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
||||
|
|
|
@ -36,13 +36,7 @@ class MixPrecisionCfg {
|
|||
this->keep_batchnorm_fp32_ = rhs.keep_batchnorm_fp32_;
|
||||
this->num_of_not_nan_iter_th_ = rhs.num_of_not_nan_iter_th_;
|
||||
}
|
||||
MixPrecisionCfg &operator=(MixPrecisionCfg const &rhs) {
|
||||
this->dynamic_loss_scale_ = rhs.dynamic_loss_scale_;
|
||||
this->loss_scale_ = rhs.loss_scale_;
|
||||
this->keep_batchnorm_fp32_ = rhs.keep_batchnorm_fp32_;
|
||||
this->num_of_not_nan_iter_th_ = rhs.num_of_not_nan_iter_th_;
|
||||
return *this;
|
||||
}
|
||||
MixPrecisionCfg &operator=(MixPrecisionCfg const &rhs) = default;
|
||||
bool dynamic_loss_scale_ = false; /**< Enable\disable dynamic loss scale during mix precision training */
|
||||
float loss_scale_; /**< Initial loss scale factor */
|
||||
bool keep_batchnorm_fp32_ = true; /**< Keep batch norm in FP32 while training */
|
||||
|
@ -58,12 +52,7 @@ class TrainCfg {
|
|||
this->mix_precision_cfg_ = rhs.mix_precision_cfg_;
|
||||
this->accumulate_gradients_ = rhs.accumulate_gradients_;
|
||||
}
|
||||
TrainCfg &operator=(const TrainCfg &rhs) {
|
||||
this->loss_name_ = rhs.loss_name_;
|
||||
this->mix_precision_cfg_ = rhs.mix_precision_cfg_;
|
||||
this->accumulate_gradients_ = rhs.accumulate_gradients_;
|
||||
return *this;
|
||||
}
|
||||
TrainCfg &operator=(const TrainCfg &rhs) = default;
|
||||
std::vector<std::string> loss_name_ = {"loss_fct"}; /**< Set part of the name that identify a loss kernel */
|
||||
MixPrecisionCfg mix_precision_cfg_; /**< Mix precision configuration */
|
||||
bool accumulate_gradients_ = false; /**< If true gardents are accmulated and can be read by GetGradients */
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <climits>
|
||||
#include <unordered_map>
|
||||
#include "include/train/train_loop_callback.h"
|
||||
#include "include/train/metrics.h"
|
||||
|
@ -87,7 +86,7 @@ class TrainLoop {
|
|||
///
|
||||
/// \return 0 on success or -1 in case of error
|
||||
virtual int Train(int epochs, mindspore::dataset::Dataset *dataset, std::vector<TrainLoopCallBack *> cbs,
|
||||
LoadDataFunc load_func = nullptr) = 0;
|
||||
LoadDataFunc load_func) = 0;
|
||||
|
||||
/// \brief Performs loop over all data in Eval Mode
|
||||
///
|
||||
|
@ -97,8 +96,8 @@ class TrainLoop {
|
|||
/// \param[in] max_steps (with default = INT_MAX the method iterates all dataset)
|
||||
///
|
||||
/// \return 0 on success or -1 in case of error
|
||||
virtual int Eval(mindspore::dataset::Dataset *dataset, std::vector<TrainLoopCallBack *> cbs,
|
||||
LoadDataFunc load_func = nullptr, int max_steps = INT_MAX) = 0;
|
||||
virtual int Eval(mindspore::dataset::Dataset *dataset, std::vector<TrainLoopCallBack *> cbs, LoadDataFunc load_func,
|
||||
int max_steps) = 0;
|
||||
};
|
||||
} // namespace session
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -18,7 +18,9 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <unordered_map>
|
||||
#include "include/api/callback/callback.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace session {
|
||||
|
|
|
@ -58,7 +58,7 @@ Status Model::Train(int epochs, std::shared_ptr<dataset::Dataset> ds, std::vecto
|
|||
return status;
|
||||
}
|
||||
|
||||
auto ret = loop->Train(epochs, ds.get(), cbs);
|
||||
auto ret = loop->Train(epochs, ds.get(), cbs, nullptr);
|
||||
|
||||
clearVectorOfPointers(&adapter_metrics);
|
||||
clearVectorOfPointers(&adapter_cbs);
|
||||
|
@ -98,7 +98,7 @@ Status Model::Evaluate(std::shared_ptr<dataset::Dataset> ds, std::vector<TrainCa
|
|||
return status;
|
||||
}
|
||||
|
||||
auto ret = loop->Eval(ds.get(), cbs);
|
||||
auto ret = loop->Eval(ds.get(), cbs, nullptr, INT_MAX);
|
||||
|
||||
clearVectorOfPointers(&adapter_metrics);
|
||||
clearVectorOfPointers(&adapter_cbs);
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.h"
|
||||
#include <math.h>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
|
|
@ -99,7 +99,6 @@ int AdamCPUKernel::Execute(int task_id) {
|
|||
}
|
||||
|
||||
int AdamRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
CHECK_NULL_RETURN(cdata);
|
||||
auto adam_kernel = reinterpret_cast<AdamCPUKernel *>(cdata);
|
||||
CHECK_NULL_RETURN(adam_kernel);
|
||||
auto error_code = RET_OK;
|
||||
|
|
|
@ -85,13 +85,13 @@ int ArithmeticGradCPUKernel::Prepare() {
|
|||
int ArithmeticGradCPUKernel::ArithmeticGradAdd(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2,
|
||||
int dx2_size) {
|
||||
if (dx1_size == dy_size) {
|
||||
memcpy(dx1, dy, dy_size * sizeof(float));
|
||||
memcpy(dx1, dy, static_cast<size_t>(dy_size) * sizeof(float));
|
||||
} else {
|
||||
ReduceSumByAxes(dy, arithmeticParameter_->out_shape_, dx1, arithmeticParameter_->in_shape0_,
|
||||
arithmeticParameter_->ndim_);
|
||||
}
|
||||
if (dx2_size == dy_size) {
|
||||
memcpy(dx2, dy, dy_size * sizeof(float));
|
||||
memcpy(dx2, dy, static_cast<size_t>(dy_size) * sizeof(float));
|
||||
} else {
|
||||
ReduceSumByAxes(dy, arithmeticParameter_->out_shape_, dx2, arithmeticParameter_->in_shape1_,
|
||||
arithmeticParameter_->ndim_);
|
||||
|
@ -102,7 +102,7 @@ int ArithmeticGradCPUKernel::ArithmeticGradAdd(float *dy, int dy_size, float *dx
|
|||
int ArithmeticGradCPUKernel::ArithmeticGradSub(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2,
|
||||
int dx2_size) {
|
||||
if (dx1_size == dy_size) {
|
||||
memcpy(dx1, dy, dy_size * sizeof(float));
|
||||
memcpy(dx1, dy, static_cast<size_t>(dy_size) * sizeof(float));
|
||||
} else {
|
||||
ReduceSumByAxes(dy, arithmeticParameter_->out_shape_, dx1, arithmeticParameter_->in_shape0_,
|
||||
arithmeticParameter_->ndim_);
|
||||
|
@ -263,7 +263,6 @@ int ArithmeticGradCPUKernel::Execute(int task_id) {
|
|||
}
|
||||
|
||||
int ArithmeticGradRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
||||
CHECK_NULL_RETURN(cdata);
|
||||
auto Arithmetic_kernel = reinterpret_cast<ArithmeticGradCPUKernel *>(cdata);
|
||||
CHECK_NULL_RETURN(Arithmetic_kernel);
|
||||
auto error_code = Arithmetic_kernel->Execute(task_id);
|
||||
|
|
|
@ -39,8 +39,12 @@ class ArithmeticGradCPUKernel : public InnerKernel {
|
|||
public:
|
||||
explicit ArithmeticGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: InnerKernel(parameter, inputs, outputs, ctx), tile_data0(NULL), tile_data1(NULL), tile_data2(NULL) {
|
||||
switch (type()) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx),
|
||||
arithmetic_grad_(nullptr),
|
||||
tile_data0(nullptr),
|
||||
tile_data1(nullptr),
|
||||
tile_data2(nullptr) {
|
||||
switch (parameter->type_) {
|
||||
case PrimitiveType_MulGrad:
|
||||
arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul; // this will be adjusted in InferShape
|
||||
break;
|
||||
|
|
|
@ -29,8 +29,8 @@ class ArithmeticSelfGradCPUKernel : public InnerKernel {
|
|||
public:
|
||||
ArithmeticSelfGradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: InnerKernel(parameter, inputs, outputs, ctx), thread_count_(ctx->thread_num_) {}
|
||||
~ArithmeticSelfGradCPUKernel() override {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx), thread_count_(ctx->thread_num_), self_grad_operation_(nullptr) {}
|
||||
~ArithmeticSelfGradCPUKernel() override = default;
|
||||
int Prepare() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
|
|
|
@ -36,7 +36,7 @@ int AssignCPUKernel::Execute(int task_id) {
|
|||
CHECK_NULL_RETURN(y);
|
||||
int length = in_tensors_.at(0)->ElementsNum();
|
||||
int stride = UP_DIV(length, thread_count_);
|
||||
int count = MSMIN(stride, length - stride * task_id);
|
||||
size_t count = MSMIN(stride, length - stride * task_id);
|
||||
|
||||
int start = stride * task_id;
|
||||
|
||||
|
|
|
@ -62,7 +62,7 @@ int BiasGradCPUKernel::Execute(int task_id) {
|
|||
size_t nhw_size = 1;
|
||||
size_t channels = bias_param->in_shape0_[bias_param->ndim_ - 1]; // C in NHWC
|
||||
for (unsigned int i = 0; i < bias_param->ndim_ - 1; i++) {
|
||||
nhw_size *= bias_param->in_shape0_[i];
|
||||
nhw_size *= static_cast<size_t>(bias_param->in_shape0_[i]);
|
||||
}
|
||||
|
||||
size_t total_size = channels * nhw_size;
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32_grad/bn_grad.h"
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
@ -121,6 +120,9 @@ int BNGradCPUKernel::Execute(int task_id) {
|
|||
case 2:
|
||||
std::fill(dscale, dscale + channels, 0.f);
|
||||
break;
|
||||
default:
|
||||
MS_LOG(ERROR) << "Exceeds the maximum thread";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
if (thread_num == 1) {
|
||||
|
@ -137,6 +139,9 @@ int BNGradCPUKernel::Execute(int task_id) {
|
|||
scale, count, total, channels, dx + task_id * stride * channels);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MS_LOG(ERROR) << "Unsupported stage";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
|
|
|
@ -54,22 +54,18 @@ int ConvolutionTrainCPUKernel::ReSize() {
|
|||
const int n = conv_param_->output_channel_ * conv_param_->group_;
|
||||
const int k = conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ / conv_param_->group_;
|
||||
|
||||
do_img2col_ = (conv_param_->kernel_h_ == 1) && (conv_param_->kernel_w_ == 1) && (conv_param_->pad_d_ == 0) &&
|
||||
(conv_param_->pad_u_ == 0) && (conv_param_->pad_l_ == 0) && (conv_param_->pad_r_ == 0) &&
|
||||
(conv_param_->dilation_h_ == 1) && (conv_param_->dilation_w_ == 1) &&
|
||||
(conv_param_->stride_h_ == 1) && (conv_param_->stride_w_ == 1) && (conv_param_->group_ == 1)
|
||||
? false
|
||||
: true;
|
||||
do_img2col_ = !((conv_param_->kernel_h_ == 1) && (conv_param_->kernel_w_ == 1) && (conv_param_->pad_d_ == 0) &&
|
||||
(conv_param_->pad_u_ == 0) && (conv_param_->pad_l_ == 0) && (conv_param_->pad_r_ == 0) &&
|
||||
(conv_param_->dilation_h_ == 1) && (conv_param_->dilation_w_ == 1) && (conv_param_->stride_h_ == 1) &&
|
||||
(conv_param_->stride_w_ == 1) && (conv_param_->group_ == 1));
|
||||
do_dw_ = (conv_param_->output_channel_ == conv_param_->group_) &&
|
||||
(conv_param_->input_channel_ == conv_param_->output_channel_) && (conv_param_->dilation_h_ == 1) &&
|
||||
(conv_param_->dilation_w_ == 1)
|
||||
? true
|
||||
: false;
|
||||
(conv_param_->input_channel_ == conv_param_->output_channel_) && (conv_param_->dilation_h_ == 1) &&
|
||||
(conv_param_->dilation_w_ == 1);
|
||||
|
||||
ws_size_ = chunk_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_;
|
||||
ws_size_ = do_dw_ ? ws_size_ : ws_size_ / conv_param_->group_;
|
||||
int mat_alloc = MatSizeTotal(chunk_, n, k, 0);
|
||||
set_workspace_size((ws_size_ + mat_alloc) * sizeof(float));
|
||||
set_workspace_size(static_cast<size_t>(ws_size_ + mat_alloc) * sizeof(float));
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -139,7 +135,7 @@ int ConvolutionTrainCPUKernel::Execute(int task_id) {
|
|||
}
|
||||
} else {
|
||||
mat_b = w_addr;
|
||||
const size_t in_plane_size = in_ch * in_h * in_w;
|
||||
const int in_plane_size = in_ch * in_h * in_w;
|
||||
for (int i = 0; i < batch; ++i) {
|
||||
im = x_addr + i * in_plane_size;
|
||||
for (int ci = 0; ci < m; ci += chunk_) {
|
||||
|
|
|
@ -61,26 +61,21 @@ int ConvolutionGradFilterCPUKernel::ReSize() {
|
|||
conv_param->output_w_ = dy_tensor->shape()[kNHWC_W];
|
||||
|
||||
NNACL_CHECK_ZERO_RETURN_ERR(conv_param->group_);
|
||||
do_img2col_ = (conv_param->kernel_h_ == 1) && (conv_param->kernel_w_ == 1) && (conv_param->pad_d_ == 0) &&
|
||||
(conv_param->pad_u_ == 0) && (conv_param->pad_l_ == 0) && (conv_param->pad_r_ == 0) &&
|
||||
(conv_param->dilation_h_ == 1) && (conv_param->dilation_w_ == 1) && (conv_param->stride_h_ == 1) &&
|
||||
(conv_param->stride_w_ == 1) && (conv_param->group_ == 1)
|
||||
? false
|
||||
: true;
|
||||
do_img2col_ = !((conv_param->kernel_h_ == 1) && (conv_param->kernel_w_ == 1) && (conv_param->pad_d_ == 0) &&
|
||||
(conv_param->pad_u_ == 0) && (conv_param->pad_l_ == 0) && (conv_param->pad_r_ == 0) &&
|
||||
(conv_param->dilation_h_ == 1) && (conv_param->dilation_w_ == 1) && (conv_param->stride_h_ == 1) &&
|
||||
(conv_param->stride_w_ == 1) && (conv_param->group_ == 1));
|
||||
do_dw_ = (conv_param->output_channel_ == conv_param->group_) &&
|
||||
(conv_param->input_channel_ == conv_param->output_channel_) && (conv_param->dilation_h_ == 1) &&
|
||||
(conv_param->dilation_w_ == 1)
|
||||
? true
|
||||
: false;
|
||||
(conv_param->input_channel_ == conv_param->output_channel_) && (conv_param->dilation_h_ == 1) &&
|
||||
(conv_param->dilation_w_ == 1);
|
||||
|
||||
ws_size_ = chunk_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_;
|
||||
ws_size_ = do_dw_ ? ws_size_ : ws_size_ / conv_param->group_;
|
||||
ws_size_ = do_dw_ ? ws_size_ : ws_size_ / static_cast<size_t>(conv_param->group_);
|
||||
int n = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_ / conv_param->group_;
|
||||
int k = conv_param->output_channel_ / conv_param->group_;
|
||||
int thread_num = op_parameter_->thread_num_;
|
||||
auto thread_num = static_cast<size_t>(op_parameter_->thread_num_);
|
||||
mat_alloc_ = MatSizeTotal(k, n, chunk_, 0);
|
||||
set_workspace_size((ws_size_ + mat_alloc_ + (k * n)) * thread_num * sizeof(float));
|
||||
|
||||
set_workspace_size((ws_size_ + mat_alloc_ + static_cast<size_t>(k * n)) * thread_num * sizeof(float));
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -97,27 +92,24 @@ int ConvolutionGradFilterCPUKernel::Execute(int task_id) {
|
|||
auto dy_addr = reinterpret_cast<float *>(input_dy->MutableData());
|
||||
auto dw_addr = reinterpret_cast<float *>(out_dw->MutableData());
|
||||
|
||||
int nweights = out_dw->ElementsNum();
|
||||
int in_ch = conv_param->input_channel_;
|
||||
int in_h = conv_param->input_h_;
|
||||
int in_w = conv_param->input_w_;
|
||||
int k_h = conv_param->kernel_h_;
|
||||
int k_w = conv_param->kernel_w_;
|
||||
int batch = conv_param->output_batch_;
|
||||
int out_ch = conv_param->output_channel_;
|
||||
int groups = conv_param->group_;
|
||||
int out_h = conv_param->output_h_;
|
||||
int out_w = conv_param->output_w_;
|
||||
|
||||
int m = out_h * out_w;
|
||||
int m = conv_param->output_h_ * conv_param->output_w_;
|
||||
int n = k_h * k_w * in_ch / groups;
|
||||
int k = out_ch / groups;
|
||||
int thread_num = op_parameter_->thread_num_;
|
||||
float *workspace_temp = reinterpret_cast<float *>(workspace());
|
||||
float *mat_workspace = workspace_temp + ws_size_ * thread_num + task_id * (mat_alloc_ + k * n);
|
||||
auto *workspace_temp = reinterpret_cast<float *>(workspace());
|
||||
float *mat_workspace =
|
||||
workspace_temp + static_cast<int>(ws_size_) * thread_num + task_id * (static_cast<int>(mat_alloc_) + k * n);
|
||||
float *mat_tmp = mat_workspace + mat_alloc_;
|
||||
int stride = UP_DIV(batch, thread_num);
|
||||
int count = MSMIN(stride, batch - stride * task_id);
|
||||
int stride = UP_DIV(conv_param->output_batch_, thread_num);
|
||||
int count = MSMIN(stride, conv_param->output_batch_ - stride * task_id);
|
||||
count = (count < 0) ? 0 : count;
|
||||
int start = stride * task_id;
|
||||
int end = start + count;
|
||||
|
@ -140,7 +132,7 @@ int ConvolutionGradFilterCPUKernel::Execute(int task_id) {
|
|||
end = start + count;
|
||||
|
||||
const int kernel_spatial = k_h * k_w;
|
||||
for (int i = 0; i < batch; ++i) {
|
||||
for (int i = 0; i < conv_param->output_batch_; ++i) {
|
||||
for (int ci = 0; ci < m; ci += chunk_) {
|
||||
real_chunk = MSMIN(m - ci, chunk_);
|
||||
mat_b = workspace_temp + task_id * ws_size_;
|
||||
|
@ -148,7 +140,7 @@ int ConvolutionGradFilterCPUKernel::Execute(int task_id) {
|
|||
RollingIm2ColPackDwUnitFp32(im, conv_param, mat_b, real_chunk, ci);
|
||||
for (int j = start; j < end; ++j) {
|
||||
mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups) + ci * out_ch;
|
||||
mat_c = dw_addr + j * nweights / groups;
|
||||
mat_c = dw_addr + j * out_dw->ElementsNum() / groups;
|
||||
GemmMatmul(1, 0, k, n, real_chunk, 1, mat_a, out_ch, mat_b + (j * kernel_spatial), n * groups, 1, mat_c, n,
|
||||
mat_workspace);
|
||||
}
|
||||
|
@ -161,8 +153,8 @@ int ConvolutionGradFilterCPUKernel::Execute(int task_id) {
|
|||
for (int j = 0; j < groups; ++j) {
|
||||
real_chunk = MSMIN(m - ci, chunk_);
|
||||
mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups) + ci * out_ch;
|
||||
mat_b = workspace_temp + task_id * ws_size_;
|
||||
mat_c = dw_addr + j * nweights / groups;
|
||||
mat_b = workspace_temp + task_id * static_cast<int>(ws_size_);
|
||||
mat_c = dw_addr + j * out_dw->ElementsNum() / groups;
|
||||
im = x_addr + (i * in_ch * in_h * in_w) + j * (in_ch / groups);
|
||||
RollingIm2ColPackUnitFp32(im, conv_param, mat_b, real_chunk, ci);
|
||||
GemmMatmul(1, 0, k, n, real_chunk, 1, mat_a, out_ch, mat_b, n, 0, mat_tmp, n, mat_workspace);
|
||||
|
@ -172,17 +164,16 @@ int ConvolutionGradFilterCPUKernel::Execute(int task_id) {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
NNACL_CHECK_ZERO_RETURN_ERR(out_w * conv_param->stride_h_);
|
||||
NNACL_CHECK_ZERO_RETURN_ERR(out_w * conv_param->stride_w_);
|
||||
NNACL_CHECK_ZERO_RETURN_ERR(conv_param->output_w_);
|
||||
mat_c = dw_addr;
|
||||
const size_t in_plane_size = in_ch * in_h * in_w;
|
||||
auto in_plane_size = in_ch * in_h * in_w;
|
||||
for (int i = start; i < end; ++i) {
|
||||
for (int ci = 0; ci < m; ci += chunk_) {
|
||||
real_chunk = MSMIN(m - ci, chunk_);
|
||||
mat_a = dy_addr + i * m * k + ci * out_ch;
|
||||
im = x_addr + i * in_plane_size;
|
||||
int input_h = ci / out_w * conv_param->stride_h_;
|
||||
int input_w = ci % out_w * conv_param->stride_w_;
|
||||
int input_h = ci / conv_param->output_w_ * conv_param->stride_h_;
|
||||
int input_w = ci % conv_param->output_w_ * conv_param->stride_w_;
|
||||
int offset = (input_h * in_w + input_w) * in_ch;
|
||||
GemmMatmul(1, 0, k, n, real_chunk, 1, mat_a, out_ch, im + offset, n, 0, mat_tmp, n, mat_workspace);
|
||||
std::unique_lock<std::mutex> merge_lock(lock_);
|
||||
|
|
|
@ -54,7 +54,7 @@ int ConvolutionGradInputCPUKernel::ReSize() {
|
|||
|
||||
int n = conv_param->kernel_w_ * conv_param->kernel_h_ * conv_param->input_channel_ / conv_param->group_;
|
||||
int k = conv_param->output_channel_ / conv_param->group_;
|
||||
int thread_num = op_parameter_->thread_num_;
|
||||
auto thread_num = static_cast<size_t>(op_parameter_->thread_num_);
|
||||
mat_alloc_ = MatSizeTotal(chunk_, n, k, 0);
|
||||
set_workspace_size((ws_size_ + mat_alloc_) * sizeof(float) * thread_num);
|
||||
|
||||
|
@ -102,7 +102,8 @@ int ConvolutionGradInputCPUKernel::Execute(int task_id) {
|
|||
int m = out_h * out_w;
|
||||
int n = k_w * k_h * in_ch / groups;
|
||||
int k = out_ch / groups;
|
||||
float *workspace_temp = reinterpret_cast<float *>(workspace()) + task_id * (mat_alloc_ + ws_size_);
|
||||
float *workspace_temp =
|
||||
reinterpret_cast<float *>(workspace()) + static_cast<size_t>(task_id) * (mat_alloc_ + ws_size_);
|
||||
float *mat_workspace = workspace_temp + ws_size_;
|
||||
int stride = UP_DIV(batch, thread_num);
|
||||
int count = MSMIN(stride, batch - stride * task_id);
|
||||
|
@ -169,10 +170,10 @@ int ConvolutionGradInputRun(void *cdata, int task_id, float lhs_scale, float rhs
|
|||
|
||||
int ConvolutionGradInputCPUKernel::Run() {
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_);
|
||||
int batch = conv_param->output_batch_;
|
||||
int in_ch = conv_param->input_channel_;
|
||||
int in_h = conv_param->input_h_;
|
||||
int in_w = conv_param->input_w_;
|
||||
auto batch = static_cast<size_t>(conv_param->output_batch_);
|
||||
auto in_ch = static_cast<size_t>(conv_param->input_channel_);
|
||||
auto in_h = static_cast<size_t>(conv_param->input_h_);
|
||||
auto in_w = static_cast<size_t>(conv_param->input_w_);
|
||||
auto *out_dx = out_tensors_.at(0);
|
||||
auto dx_addr = reinterpret_cast<float *>(out_dx->MutableData());
|
||||
memset(dx_addr, 0, sizeof(float) * batch * in_ch * in_h * in_w);
|
||||
|
|
|
@ -80,7 +80,7 @@ int DropoutCPUKernel::Execute(int task_id) {
|
|||
std::bernoulli_distribution distribution(param->ratio_);
|
||||
|
||||
for (int i = start; i < end; i++) {
|
||||
mask[i] = distribution(generator);
|
||||
mask[i] = static_cast<float>(distribution(generator));
|
||||
output_ptr[i] = input_ptr[i] * mask[i] * scale_;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ class DropoutGradCPUKernel : public InnerKernel {
|
|||
int Execute(int task_id);
|
||||
|
||||
private:
|
||||
float scale_;
|
||||
float scale_ = 1.0f;
|
||||
int thread_count_ = 1;
|
||||
};
|
||||
|
||||
|
|
|
@ -73,13 +73,13 @@ int ResizeGradCPUKernel::Execute(int task_id) {
|
|||
CHECK_NULL_RETURN(param);
|
||||
auto batch_size = in_tensors_.at(0)->Batch();
|
||||
auto channel = in_tensors_.at(0)->Channel();
|
||||
int error_code = NNACL_OK;
|
||||
int error_code;
|
||||
if (param->method == static_cast<int>(schema::ResizeMethod_NEAREST)) {
|
||||
error_code = ResizeNearestNeighborGrad(in_addr, out_addr, batch_size, channel, in_tensors_.at(0)->format(), param);
|
||||
} else {
|
||||
error_code = ResizeBiLinearGrad(in_addr, out_addr, batch_size, channel, in_tensors_.at(0)->format(), param);
|
||||
}
|
||||
if (error_code != NNACL_OK) {
|
||||
if (error_code != static_cast<int>(NNACL_OK)) {
|
||||
MS_LOG(ERROR) << "Resize fp32 grad failed.";
|
||||
return error_code;
|
||||
}
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32_grad/sigmoid_cross_entropy_with_logits.h"
|
||||
#include <cmath>
|
||||
#include "src/kernel_registry.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32_grad/sigmoid_cross_entropy_with_logits_grad.h"
|
||||
#include <cmath>
|
||||
#include "src/kernel_registry.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ constexpr static int kOutputIdx = 0;
|
|||
|
||||
int SmoothL1LossCPUKernel::ReSize() { return RET_OK; }
|
||||
|
||||
int SmoothL1LossCPUKernel::Execute(int task_id) {
|
||||
int SmoothL1LossCPUKernel::Execute(size_t task_id) {
|
||||
SmoothL1LossParameter *smooth_l1_loss_param = reinterpret_cast<SmoothL1LossParameter *>(op_parameter_);
|
||||
CHECK_NULL_RETURN(smooth_l1_loss_param);
|
||||
auto predict = reinterpret_cast<float *>(in_tensors_.at(kPredictIdx)->MutableData());
|
||||
|
@ -42,7 +42,7 @@ int SmoothL1LossCPUKernel::Execute(int task_id) {
|
|||
const size_t length = in_tensors_.at(kPredictIdx)->ElementsNum();
|
||||
|
||||
size_t stride = UP_DIV(length, thread_count_);
|
||||
int count = MSMIN(stride, length - stride * task_id);
|
||||
size_t count = MSMIN(stride, length - stride * task_id);
|
||||
|
||||
size_t start = stride * task_id;
|
||||
size_t end = start + count;
|
||||
|
|
|
@ -33,11 +33,11 @@ class SmoothL1LossCPUKernel : public InnerKernel {
|
|||
int Prepare() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
int Execute(int task_id);
|
||||
int Execute(size_t task_id);
|
||||
|
||||
private:
|
||||
SmoothL1LossParameter *smooth_l1_param_;
|
||||
int thread_count_ = 1;
|
||||
size_t thread_count_ = 1;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *lab
|
|||
float *output2) const {
|
||||
float eps = 1e-6;
|
||||
if (grads != nullptr) {
|
||||
for (int i = 0; i < param_->batch_size_; ++i) {
|
||||
for (size_t i = 0; i < static_cast<size_t>(param_->batch_size_); ++i) {
|
||||
float loss = 0.f;
|
||||
for (size_t j = 0; j < param_->number_of_classes_; ++j) {
|
||||
float logit =
|
||||
|
@ -45,7 +45,7 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *lab
|
|||
output2[i] = loss;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < param_->batch_size_; ++i) {
|
||||
for (size_t i = 0; i < static_cast<size_t>(param_->batch_size_); ++i) {
|
||||
float loss = 0.f;
|
||||
for (size_t j = 0; j < param_->number_of_classes_; ++j) {
|
||||
float logit =
|
||||
|
@ -123,7 +123,7 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::ReSize() {
|
|||
}
|
||||
|
||||
size_t data_size = in_tensors_.at(0)->ElementsNum();
|
||||
set_workspace_size((data_size + dims.at(0)) * sizeof(float));
|
||||
set_workspace_size((data_size + static_cast<size_t>(dims.at(0))) * sizeof(float));
|
||||
sm_params_.n_dim_ = 2;
|
||||
sm_params_.element_size_ = data_size;
|
||||
sm_params_.axis_ = 1;
|
||||
|
|
|
@ -33,7 +33,7 @@ class SoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel {
|
|||
: LossKernel(parameter, inputs, outputs, ctx) {
|
||||
param_ = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter);
|
||||
}
|
||||
~SoftmaxCrossEntropyWithLogitsCPUKernel() override {}
|
||||
~SoftmaxCrossEntropyWithLogitsCPUKernel() override = default;
|
||||
|
||||
void ForwardPostExecute(const float *labels, const float *logits, float *output1, float *output2) const;
|
||||
|
||||
|
|
|
@ -49,9 +49,9 @@ int SoftmaxGradCPUKernel::Prepare() {
|
|||
|
||||
inner_size_ = 1;
|
||||
for (size_t i = axis + 1; i < in_dims; i++) {
|
||||
inner_size_ *= in_shape.at(i);
|
||||
inner_size_ *= static_cast<size_t>(in_shape.at(i));
|
||||
}
|
||||
set_workspace_size(inner_size_ * (1 + in_shape.at(axis)) * sizeof(float));
|
||||
set_workspace_size(inner_size_ * (1 + static_cast<size_t>(in_shape.at(axis))) * sizeof(float));
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ class SoftmaxGradCPUKernel : public InnerKernel {
|
|||
|
||||
private:
|
||||
SoftmaxParameter *param;
|
||||
size_t inner_size_;
|
||||
size_t inner_size_ = 0;
|
||||
};
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -34,7 +34,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const int *
|
|||
float *output) const {
|
||||
float total_loss = 0;
|
||||
MS_CHECK_GT(param->batch_size_, 0, RET_ERROR);
|
||||
for (int i = 0; i < param->batch_size_; ++i) {
|
||||
for (size_t i = 0; i < static_cast<size_t>(param->batch_size_); ++i) {
|
||||
if (labels[i] < 0) {
|
||||
MS_LOG(ERROR) << "label value must >= 0";
|
||||
return RET_ERROR;
|
||||
|
@ -91,7 +91,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) {
|
|||
float *losses = static_cast<float *>(workspace());
|
||||
CHECK_NULL_RETURN(losses);
|
||||
float *sum_data = losses + data_size;
|
||||
int length = sm_params_.input_shape_[sm_params_.axis_];
|
||||
int length = sm_params_->input_shape_[sm_params_->axis_];
|
||||
int stride = UP_DIV(outter_size_, threads_);
|
||||
int count = MSMIN(stride, outter_size_ - stride * task_id);
|
||||
if (count <= 0) return RET_OK;
|
||||
|
@ -108,6 +108,9 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) {
|
|||
} else {
|
||||
return ForwardPostExecute(labels, losses, out);
|
||||
}
|
||||
default:
|
||||
MS_LOG(ERROR) << "Unsupported stage";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -125,9 +128,9 @@ int SparseSoftmaxCrossEntropyWithLogitsRun(void *cdata, int task_id, float lhs_s
|
|||
}
|
||||
|
||||
int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Run() {
|
||||
int axis = sm_params_.axis_;
|
||||
int n_dim = sm_params_.n_dim_;
|
||||
const int *input_shape = sm_params_.input_shape_;
|
||||
int axis = sm_params_->axis_;
|
||||
int n_dim = sm_params_->n_dim_;
|
||||
const int *input_shape = sm_params_->input_shape_;
|
||||
int inner_size = 1;
|
||||
int outter_size = 1;
|
||||
CHECK_NULL_RETURN(in_tensors_.at(0));
|
||||
|
@ -136,7 +139,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Run() {
|
|||
CHECK_NULL_RETURN(losses);
|
||||
float *sum_data = losses + data_size;
|
||||
std::fill(losses, losses + data_size, 0.f);
|
||||
std::fill(sum_data, sum_data + sm_params_.input_shape_[0], 0.f);
|
||||
std::fill(sum_data, sum_data + sm_params_->input_shape_[0], 0.f);
|
||||
for (int i = 0; i < axis; i++) {
|
||||
outter_size *= input_shape[i];
|
||||
}
|
||||
|
@ -182,12 +185,17 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Prepare() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
size_t data_size = in_tensors_.at(0)->ElementsNum();
|
||||
set_workspace_size((data_size + dims.at(0)) * sizeof(float));
|
||||
sm_params_.n_dim_ = 2;
|
||||
sm_params_.element_size_ = static_cast<int>(data_size);
|
||||
sm_params_.axis_ = 1;
|
||||
set_workspace_size((data_size + static_cast<size_t>(dims.at(0))) * sizeof(float));
|
||||
sm_params_ = new (std::nothrow) SoftmaxParameter();
|
||||
if (sm_params_ == nullptr) {
|
||||
MS_LOG(ERROR) << "new softmax param failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
sm_params_->n_dim_ = 2;
|
||||
sm_params_->element_size_ = static_cast<int>(data_size);
|
||||
sm_params_->axis_ = 1;
|
||||
for (size_t i = 0; i < dims.size(); i++) {
|
||||
sm_params_.input_shape_[i] = dims.at(i);
|
||||
sm_params_->input_shape_[i] = dims.at(i);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -34,7 +34,12 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel {
|
|||
: LossKernel(parameter, inputs, outputs, ctx) {
|
||||
param = reinterpret_cast<SoftmaxCrossEntropyParameter *>(parameter);
|
||||
}
|
||||
~SparseSoftmaxCrossEntropyWithLogitsCPUKernel() override {}
|
||||
~SparseSoftmaxCrossEntropyWithLogitsCPUKernel() override {
|
||||
if (sm_params_ != nullptr) {
|
||||
delete sm_params_;
|
||||
sm_params_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
int ForwardPostExecute(const int *labels, const float *losses, float *output) const;
|
||||
int GradPostExecute(const int *labels, const float *losses, float *grads) const;
|
||||
|
@ -46,11 +51,11 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel {
|
|||
|
||||
private:
|
||||
SoftmaxCrossEntropyParameter *param;
|
||||
SoftmaxParameter sm_params_;
|
||||
SoftmaxParameter *sm_params_ = nullptr;
|
||||
int inner_size_ = 1;
|
||||
int outter_size_ = 1;
|
||||
int stage_;
|
||||
int threads_;
|
||||
int stage_ = 0;
|
||||
int threads_ = 0;
|
||||
};
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -46,13 +46,13 @@ int UnsortedSegmentSumCPUKernel::Prepare() {
|
|||
for (size_t i = 0; i < input_shape.size(); ++i) {
|
||||
unit_num_ *= input_shape[i];
|
||||
if (i >= segment_ids_shape.size()) {
|
||||
input_dim1_ *= input_shape[i];
|
||||
input_dim1_ *= static_cast<size_t>(input_shape[i]);
|
||||
}
|
||||
}
|
||||
output_dim0_ = output_shape[0];
|
||||
output_dim1_ = 1;
|
||||
for (size_t j = 1; j < output_shape.size(); j++) {
|
||||
output_dim1_ *= output_shape[j];
|
||||
output_dim1_ *= static_cast<size_t>(output_shape[j]);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ class LossKernel : public InnerKernel {
|
|||
LossKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {}
|
||||
~LossKernel() = default;
|
||||
~LossKernel() override = default;
|
||||
};
|
||||
|
||||
} // namespace mindspore::kernel
|
||||
|
|
Loading…
Reference in New Issue