fix tsan bugs

This commit is contained in:
jianghui58 2022-07-26 12:32:37 +08:00
parent 1b8a5ae512
commit 742291e747
4 changed files with 17 additions and 11 deletions

View File

@ -30,9 +30,9 @@ namespace mindspore::kernel {
int AssignCPUKernel::ReSize() { return RET_OK; }
int AssignCPUKernel::DoExecute(int task_id) {
auto x = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
auto x = reinterpret_cast<float *>(in_tensors_.at(0)->data());
CHECK_NULL_RETURN(x);
auto y = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
auto y = reinterpret_cast<float *>(in_tensors_.at(1)->data());
CHECK_NULL_RETURN(y);
int length = in_tensors_.at(0)->ElementsNum();
int stride = UP_DIV(length, thread_count_);

View File

@ -57,8 +57,8 @@ int DoSgd(float *weight, float *accumulate, float *gradient, float learning_rate
return RET_OK;
}
int DoSgdInit(float *weight, float *accumulate, float *gradient, float *stat, float learning_rate, float moment,
bool nesterov, float weight_decay, int start, int end) {
int DoSgdInit(float *weight, float *accumulate, float *gradient, float learning_rate, float moment, bool nesterov,
float weight_decay, int start, int end) {
std::copy(&(gradient[start]), &(gradient[end]), &(accumulate[start]));
if (weight_decay > 0.f) {
for (int i = start; i < end; ++i) {
@ -75,7 +75,6 @@ int DoSgdInit(float *weight, float *accumulate, float *gradient, float *stat, fl
weight[i] -= accumulate[i] * learning_rate;
}
}
*stat = 0.0f;
} else {
for (int i = start; i < end; ++i) {
weight[i] -= accumulate[i] * learning_rate;
@ -129,8 +128,9 @@ int SgdCPUKernel::ExecuteInit(int task_id) {
int end = start + count;
if (count > 0) {
DoSgdInit(weight, accumulate, gradient, stat, learning_rate, moment, sgd_param_->use_nesterov_,
sgd_param_->weight_decay_, start, end);
(void)DoSgdInit(weight, accumulate, gradient, learning_rate, moment, sgd_param_->use_nesterov_,
sgd_param_->weight_decay_, start, end);
sgd_stat_ = 0.0f;
}
return RET_OK;
}
@ -172,9 +172,11 @@ int SgdRunInit(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
int SgdCPUKernel::Run() {
auto stat = reinterpret_cast<float *>(in_tensors_.at(5)->MutableData());
CHECK_NULL_RETURN(stat);
sgd_stat_ = *stat;
auto error_code = RET_OK;
if (*stat > 0.0f) {
if (sgd_stat_ > 0.0f) {
error_code = ParallelLaunch(this->ms_context_, SgdRunInit, this, thread_count_);
*stat = sgd_stat_;
} else {
error_code = ParallelLaunch(this->ms_context_, SgdRun, this, thread_count_);
}
@ -235,8 +237,9 @@ int SgdCPUKernel::OptimizerStep() {
DoSgd(weight, accumulate, grad_sum_, learning_rate, sgd_param_->dampening_, moment, sgd_param_->use_nesterov_,
sgd_param_->weight_decay_, start, end);
} else {
DoSgdInit(weight, accumulate, grad_sum_, stat, learning_rate, moment, sgd_param_->use_nesterov_,
sgd_param_->weight_decay_, start, end);
(void)DoSgdInit(weight, accumulate, grad_sum_, learning_rate, moment, sgd_param_->use_nesterov_,
sgd_param_->weight_decay_, start, end);
*stat = 0.0f;
}
std::fill(grad_sum_, grad_sum_ + length, 0);
OptimizerKernel::OptimizerStep();

View File

@ -18,6 +18,7 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_GRAD_SGD_H_
#include <vector>
#include <atomic>
#include "src/train/optimizer_kernel.h"
#include "nnacl/fp32_grad/optimizer.h"
@ -46,6 +47,7 @@ class SgdCPUKernel : public OptimizerKernel {
private:
int thread_count_;
SgdParameter *sgd_param_;
std::atomic<float> sgd_stat_{0.0f};
};
} // namespace mindspore::kernel

View File

@ -20,6 +20,7 @@
#include <cfloat>
#include <algorithm>
#include <string>
#include <atomic>
#include <iostream>
#include "src/runtime/kernel_exec.h"
#include "include/errorcode.h"
@ -217,7 +218,7 @@ class OptimizerKernel : public LiteKernel {
int lr_idx_ = 0;
int grad_idx_ = 0;
float *grad_sum_ = nullptr;
bool valid_grad_sum_ = false;
std::atomic_bool valid_grad_sum_ = false;
private:
WeightUpdateMode weight_update_mod_ = WeightUpdateMode::NORMAL;