fix tsan bugs

2022-07-26 12:32:37 +08:00 · 2022-07-26 12:32:37 +08:00 · 742291e747
parent 1b8a5ae512
commit 742291e747
4 changed files with 17 additions and 11 deletions
--- a/mindspore/lite/src/runtime/kernel/cpu/fp32_grad/assign.cc
+++ b/mindspore/lite/src/runtime/kernel/cpu/fp32_grad/assign.cc
@ -30,9 +30,9 @@ namespace mindspore::kernel {
 int AssignCPUKernel::ReSize() { return RET_OK; }

 int AssignCPUKernel::DoExecute(int task_id) {
-  auto x = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
+  auto x = reinterpret_cast<float *>(in_tensors_.at(0)->data());
  CHECK_NULL_RETURN(x);
-  auto y = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
+  auto y = reinterpret_cast<float *>(in_tensors_.at(1)->data());
  CHECK_NULL_RETURN(y);
  int length = in_tensors_.at(0)->ElementsNum();
  int stride = UP_DIV(length, thread_count_);
--- a/mindspore/lite/src/runtime/kernel/cpu/fp32_grad/sgd.cc
+++ b/mindspore/lite/src/runtime/kernel/cpu/fp32_grad/sgd.cc
@ -57,8 +57,8 @@ int DoSgd(float *weight, float *accumulate, float *gradient, float learning_rate
  return RET_OK;
 }

-int DoSgdInit(float *weight, float *accumulate, float *gradient, float *stat, float learning_rate, float moment,
-              bool nesterov, float weight_decay, int start, int end) {
+int DoSgdInit(float *weight, float *accumulate, float *gradient, float learning_rate, float moment, bool nesterov,
+              float weight_decay, int start, int end) {
  std::copy(&(gradient[start]), &(gradient[end]), &(accumulate[start]));
  if (weight_decay > 0.f) {
    for (int i = start; i < end; ++i) {
@ -75,7 +75,6 @@ int DoSgdInit(float *weight, float *accumulate, float *gradient, float *stat, fl
        weight[i] -= accumulate[i] * learning_rate;
      }
    }
-    *stat = 0.0f;
  } else {
    for (int i = start; i < end; ++i) {
      weight[i] -= accumulate[i] * learning_rate;
@ -129,8 +128,9 @@ int SgdCPUKernel::ExecuteInit(int task_id) {
  int end = start + count;

  if (count > 0) {
-    DoSgdInit(weight, accumulate, gradient, stat, learning_rate, moment, sgd_param_->use_nesterov_,
-              sgd_param_->weight_decay_, start, end);
+    (void)DoSgdInit(weight, accumulate, gradient, learning_rate, moment, sgd_param_->use_nesterov_,
+                    sgd_param_->weight_decay_, start, end);
+    sgd_stat_ = 0.0f;
  }
  return RET_OK;
 }
@ -172,9 +172,11 @@ int SgdRunInit(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 int SgdCPUKernel::Run() {
  auto stat = reinterpret_cast<float *>(in_tensors_.at(5)->MutableData());
  CHECK_NULL_RETURN(stat);
+  sgd_stat_ = *stat;
  auto error_code = RET_OK;
-  if (*stat > 0.0f) {
+  if (sgd_stat_ > 0.0f) {
    error_code = ParallelLaunch(this->ms_context_, SgdRunInit, this, thread_count_);
+    *stat = sgd_stat_;
  } else {
    error_code = ParallelLaunch(this->ms_context_, SgdRun, this, thread_count_);
  }
@ -235,8 +237,9 @@ int SgdCPUKernel::OptimizerStep() {
      DoSgd(weight, accumulate, grad_sum_, learning_rate, sgd_param_->dampening_, moment, sgd_param_->use_nesterov_,
            sgd_param_->weight_decay_, start, end);
    } else {
-      DoSgdInit(weight, accumulate, grad_sum_, stat, learning_rate, moment, sgd_param_->use_nesterov_,
-                sgd_param_->weight_decay_, start, end);
+      (void)DoSgdInit(weight, accumulate, grad_sum_, learning_rate, moment, sgd_param_->use_nesterov_,
+                      sgd_param_->weight_decay_, start, end);
+      *stat = 0.0f;
    }
    std::fill(grad_sum_, grad_sum_ + length, 0);
    OptimizerKernel::OptimizerStep();
--- a/mindspore/lite/src/runtime/kernel/cpu/fp32_grad/sgd.h
+++ b/mindspore/lite/src/runtime/kernel/cpu/fp32_grad/sgd.h
@ -18,6 +18,7 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_FP32_GRAD_SGD_H_

 #include <vector>
+#include <atomic>
 #include "src/train/optimizer_kernel.h"
 #include "nnacl/fp32_grad/optimizer.h"

@ -46,6 +47,7 @@ class SgdCPUKernel : public OptimizerKernel {
 private:
  int thread_count_;
  SgdParameter *sgd_param_;
+  std::atomic<float> sgd_stat_{0.0f};
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/train/optimizer_kernel.h
+++ b/mindspore/lite/src/train/optimizer_kernel.h
@ -20,6 +20,7 @@
 #include <cfloat>
 #include <algorithm>
 #include <string>
+#include <atomic>
 #include <iostream>
 #include "src/runtime/kernel_exec.h"
 #include "include/errorcode.h"
@ -217,7 +218,7 @@ class OptimizerKernel : public LiteKernel {
  int lr_idx_ = 0;
  int grad_idx_ = 0;
  float *grad_sum_ = nullptr;
-  bool valid_grad_sum_ = false;
+  std::atomic_bool valid_grad_sum_ = false;

 private:
  WeightUpdateMode weight_update_mod_ = WeightUpdateMode::NORMAL;