!18311 optimize parallel function call

Merge pull request !18311 from yangjie159/mindrt_thread
2021-06-16 09:23:29 +08:00 · 2021-06-16 09:23:29 +08:00 · fe37f625ca
parent 63428d5e49 8cf8fa4ea7
commit fe37f625ca
183 changed files with 222 additions and 409 deletions
--- a/mindspore/lite/src/inner_context.cc
+++ b/mindspore/lite/src/inner_context.cc
@ -305,4 +305,15 @@ NpuDeviceInfo InnerContext::GetNpuInfo() const {

 // Support CPU backend to judge whether it supports Float16.
 bool InnerContext::IsSupportFloat16() const { return fp16_flag_; }
+
+ActorThreadPool *InnerContext::thread_pool() const { return thread_pool_; }
+
+int ParallelLaunch(const Context *context, const Func &func, Content content, int task_num) {
+  ActorThreadPool *pool = static_cast<const lite::InnerContext *>(context)->thread_pool();
+  if (pool == nullptr) {
+    MS_LOG(ERROR) << "thread pool is nullptr";
+    return RET_NULL_PTR;
+  }
+  return pool->ParallelLaunch(func, content, task_num);
+}
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/inner_context.h
+++ b/mindspore/lite/src/inner_context.h
@ -30,9 +30,6 @@

 namespace mindspore::lite {
 struct InnerContext : public Context {
- public:
-  ActorThreadPool *thread_pool_{nullptr};
-
 public:
  InnerContext() = default;

@ -64,6 +61,8 @@ struct InnerContext : public Context {

  int IsValid() const;

+  ActorThreadPool *thread_pool() const;
+
  virtual ~InnerContext();

 private:
@ -83,6 +82,8 @@ struct InnerContext : public Context {

  bool fp16_flag_ = false;

+  ActorThreadPool *thread_pool_{nullptr};
+
 #ifdef ENABLE_ARM
 #ifndef MS_COMPILE_IOS
  CpuInfo *cpu_info_ = nullptr;
@ -95,6 +96,9 @@ struct InnerContext : public Context {
 #endif
 #endif
 };
+
+int ParallelLaunch(const Context *context, const Func &func, Content content, int task_num);
+
 }  // namespace mindspore::lite

 #endif  // MINDSPORE_LITE_SRC_INNER_CONTEXT_H
--- a/mindspore/lite/src/lite_mindrt.cc
+++ b/mindspore/lite/src/lite_mindrt.cc
@ -323,7 +323,7 @@ std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel
                                                        const lite::InnerContext *ctx) {
  std::vector<std::shared_ptr<LiteOpActor>> actors;
  std::unordered_map<size_t, AID> partial_map{};
-  auto thread_pool = ctx->thread_pool_;
+  auto thread_pool = ctx->thread_pool();
  if (thread_pool == nullptr) {
    MS_LOG(ERROR) << "thread pool is nullptr";
    return actors;
--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@ -881,7 +881,7 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
 }

 int LiteSession::InitGPURuntime() {
-  ActorThreadPool *thread_pool = this->context_->thread_pool_;
+  ActorThreadPool *thread_pool = this->context_->thread_pool();
  if (thread_pool == nullptr) {
    MS_LOG(ERROR) << "thread pool is nullptr";
    is_running_.store(false);
--- a/mindspore/lite/src/runtime/kernel/arm/base/constant_of_shape.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/constant_of_shape.cc
@ -76,8 +76,7 @@ int ConstantOfShapeCPUKernel::Run() {
  }
  thread_stride_ = UP_DIV(param_->element_size_, thread_count);

-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(ConstantOfShapeRun, this, thread_count);
+  auto ret = ParallelLaunch(this->context_, ConstantOfShapeRun, this, thread_count);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ConstantOfShapeRun error error_code[" << ret << "]";
    return ret;
--- a/mindspore/lite/src/runtime/kernel/arm/base/detection_post_process_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/detection_post_process_base.cc
@ -236,8 +236,7 @@ int DetectionPostProcessBaseCPUKernel::Run() {
      return status;
    }
  } else {
-    status = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_);
+    status = ParallelLaunch(this->context_, NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "NmsMultiClassesFastCoreRun error error_code[" << status << "]";
      FreeAllocatedBuffer();
--- a/mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc
@ -165,8 +165,7 @@ int RunPriorBox(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }

 int PriorBoxCPUKernel::Run() {
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(RunPriorBox, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_, RunPriorBox, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "PriorBox run error, error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
@ -175,8 +175,7 @@ int QuantDTypeCastCPUKernel::Run() {
    uint8_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_[0]->data_c());
  }

-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(QuantDTypeCastRun, this, thread_n_num_);
+  auto ret = ParallelLaunch(this->context_, QuantDTypeCastRun, this, thread_n_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
    if (in_tensors_[0]->data_type() == TypeId::kNumberTypeInt8 &&
--- a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
@ -70,8 +70,7 @@ int ReshapeRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 int ReshapeBaseCPUKernel::Run() {
  input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.at(kInputIndex)->data_c());
  output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.at(kOutputIndex)->data_c());
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(ReshapeRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, ReshapeRun, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]";
    return ret;
--- a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
@ -82,8 +82,7 @@ int SliceCPUKernel::Run() {
                      lite::DataTypeSize(in_tensors_.at(0)->data_type()));
    return RET_OK;
  }
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(SliceLaunch, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, SliceLaunch, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "slice launch fail!ret: " << ret;
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/base/split_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/split_base.cc
@ -125,8 +125,7 @@ int SplitBaseCPUKernel::Run() {
    output_ptr_.at(i) = output_tensor->data_c();
  }

-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(SplitRun, this, thread_n_num_);
+  auto ret = ParallelLaunch(this->context_, SplitRun, this, thread_n_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "split error error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/base/split_with_over_lap_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/split_with_over_lap_base.cc
@ -117,8 +117,7 @@ int SplitWithOverlapBaseCPUKernel::Run() {
    inner_stride_ *= input_shape[i];
  }

-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(SplitWithOverlapRun, this, context_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, SplitWithOverlapRun, this, context_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ParallelLaunch for SplitWIthOverlapRun run fail. errorcode:[" << ret << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
@ -100,8 +100,7 @@ int StackBaseCPUKernel::Run() {
  }
  // run stack
  num_threads_ = MSMIN(UP_DIV(outer_size_, 64), op_parameter_->thread_num_);
-  auto ret =
-    static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(StackRun, this, num_threads_);
+  auto ret = ParallelLaunch(this->context_, StackRun, this, num_threads_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc
@ -162,8 +162,7 @@ int StridedSliceCPUKernel::FastRun() {
  }
  input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.front()->data_c());
  output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.front()->data_c());
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(StrideRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, StrideRun, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Stride run error error_code[" << ret << "]";
    return ret;
--- a/mindspore/lite/src/runtime/kernel/arm/base/tile_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/tile_base.cc
@ -128,8 +128,7 @@ int TileCPUKernel::SimpleTileImpl(int task_id) {
 }

 int TileCPUKernel::RunSimpleTile() {
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(SimpleTile, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, SimpleTile, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "RunSimpleTile error code[" << ret << "]";
    return ret;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
@ -103,8 +103,7 @@ int ActivationFp16CPUKernel::Run() {
  fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
  fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());

-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(ActivationFp16Run, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_, ActivationFp16Run, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/addn_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/addn_fp16.cc
@ -88,8 +88,7 @@ int AddNFp16CPUKernel::Run() {
  in1_addr_ = input0_data;
  in2_addr_ = input1_data;
  out_addr_ = out_data;
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, AddNLaunch, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "addn launch fail!ret: " << ret;
    return RET_ERROR;
@ -97,8 +96,7 @@ int AddNFp16CPUKernel::Run() {
  for (size_t i = 2; i < in_tensors_.size(); ++i) {
    in1_addr_ = reinterpret_cast<float16_t *>(in_tensors_[i]->MutableData());
    in2_addr_ = out_data;
-    ret = static_cast<const lite::InnerContext *>(this->context_)
-            ->thread_pool_->ParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_);
+    ret = ParallelLaunch(this->context_, AddNLaunch, this, op_parameter_->thread_num_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i;
      return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc
@ -168,8 +168,7 @@ int ArithmeticCompareFP16CPUKernel::Run() {
    FreeTmpBuffer();
    return RET_ERROR;
  }
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(ArithmeticsRunFp16, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, ArithmeticsRunFp16, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ArithmeticsRunFp16 run error error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
@ -182,8 +182,7 @@ int ArithmeticFP16CPUKernel::Run() {
    FreeFp16Buffer();
    return RET_ERROR;
  }
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(ArithmeticsRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, ArithmeticsRun, this, op_parameter_->thread_num_);
  if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32) {
    Float16ToFloat32(static_cast<float16_t *>(output_ptr_), reinterpret_cast<float *>(output_tensor->MutableData()),
                     output_tensor->ElementsNum());
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc
@ -84,8 +84,7 @@ int ArithmeticSelfFp16CPUKernel::Run() {
  }
  output_fp16_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());

-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(ArithmeticSelfRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
@ -63,8 +63,7 @@ int BatchnormFp16CPUKernel::Run() {
    return RET_ERROR;
  }

-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, BatchNormRun, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
@ -131,8 +131,7 @@ int CastFp16CPUKernel::Run() {
  if (data_num_ == 0) {
    return RET_OK;
  }
-  return static_cast<const lite::InnerContext *>(this->context_)
-    ->thread_pool_->ParallelLaunch(CastFp16Run, this, op_parameter_->thread_num_);
+  return ParallelLaunch(this->context_, CastFp16Run, this, op_parameter_->thread_num_);
 }

 REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Cast, LiteKernelCreator<CastFp16CPUKernel>)
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
@ -261,16 +261,14 @@ int Convolution1x1FP16CPUKernel::Run() {

    int ret = RET_ERROR;
    if (multi_thread_by_hw_) {
-      ret = static_cast<const lite::InnerContext *>(this->context_)
-              ->thread_pool_->ParallelLaunch(Convolution1x1Fp16RunHw, this, thread_count_);
+      ret = ParallelLaunch(this->context_, Convolution1x1Fp16RunHw, this, thread_count_);
    } else {
 #ifdef ENABLE_ARM64
      RowMajor2Col16MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
 #else
      RowMajor2Col12MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
 #endif
-      ret = static_cast<const lite::InnerContext *>(this->context_)
-              ->thread_pool_->ParallelLaunch(Convolution1x1Fp16RunOc, this, thread_count_);
+      ret = ParallelLaunch(this->context_, Convolution1x1Fp16RunOc, this, thread_count_);
    }
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "ParallelLaunch failed.";
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
@ -117,8 +117,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() {
    }
    is_repack_ = false;
  }
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(ConvDwFp16Run, this, conv_param_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, ConvDwFp16Run, this, conv_param_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
@ -169,8 +169,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
    }
    is_repack_ = false;
  }
-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(ConvDwSWFp16Run, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_, ConvDwSWFp16Run, this, conv_param_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
@ -160,8 +160,7 @@ int ConvolutionFP16CPUKernel::Run() {
    }
    is_repack_ = false;
  }
-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(ConvolutionFp16Impl, this, thread_count_);
+  ret = ParallelLaunch(this->context_, ConvolutionFp16Impl, this, thread_count_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc
@ -237,8 +237,7 @@ int ConvolutionWinogradFP16CPUKernel::Run() {
    }
    is_repack_ = false;
  }
-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(ConvolutionWinogradFp16Impl, this, thread_count_);
+  ret = ParallelLaunch(this->context_, ConvolutionWinogradFp16Impl, this, thread_count_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc
@ -52,8 +52,7 @@ int CropFp16CPUKernel::Run() {
  input_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
  output_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());

-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(CropFp16Run, this, crop_para_->thread_count_);
+  auto ret = ParallelLaunch(this->context_, CropFp16Run, this, crop_para_->thread_count_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ParallelLaunch failed: " << ret;
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
@ -179,8 +179,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
    memset(output_ptr, 0, out_tensors_.at(kOutputIndex)->ElementsNum() * sizeof(float16_t));
    packed_output_ = output_ptr;
  }
-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(DeconvDwFp16Run, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_, DeconvDwFp16Run, this, conv_param_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
@ -222,8 +222,7 @@ int DeConvolutionFp16CPUKernel::Run() {

    RowMajor2Col16MajorFp16Opt(batch_input_, pack_input_, input_plane_, conv_param_->input_channel_);

-    error_code = static_cast<const lite::InnerContext *>(this->context_)
-                   ->thread_pool_->ParallelLaunch(DeConvFp16Run, this, thread_count_);
+    error_code = ParallelLaunch(this->context_, DeConvFp16Run, this, thread_count_);
    if (error_code != RET_OK) {
      MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]";
    }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc
@ -399,15 +399,13 @@ int DeConvWinogradFp16CPUKernel::Run() {
    nhwc_output_ = output_ptr + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_;

    ::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float16_t));
-    auto ret = static_cast<const lite::InnerContext *>(this->context_)
-                 ->thread_pool_->ParallelLaunch(DeConvWgFp16Run, this, deconv_param_->thread_num_);
+    auto ret = ParallelLaunch(this->context_, DeConvWgFp16Run, this, deconv_param_->thread_num_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "DeConvWgFp16Run failed!";
      return ret;
    }
    // post bias activate and nhwc
-    ret = static_cast<const lite::InnerContext *>(this->context_)
-            ->thread_pool_->ParallelLaunch(DeConvWgPostFp16Run, this, thread_num_hw_);
+    ret = ParallelLaunch(this->context_, DeConvWgPostFp16Run, this, thread_num_hw_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "DeConvWgPostFp16Run failed!";
      return ret;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc
@ -147,8 +147,7 @@ int GatherFp16CPUKernel::Run() {
      Float32ToFloat16(reinterpret_cast<float *>(input_tensor->data_c()), input_data_, input_tensor->ElementsNum());
    }
  }
-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(GatherRunFp16, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_, GatherRunFp16, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc
@ -108,8 +108,7 @@ int InstanceNormFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_sca
 int InstanceNormFp16CPUKernel::Run() {
  src_data_ = reinterpret_cast<float16_t *>(in_tensors_[0]->data_c());
  dst_data_ = reinterpret_cast<float16_t *>(out_tensors_[0]->data_c());
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(InstanceNormFp16Run, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, InstanceNormFp16Run, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "InstanceNormFp16Run error error_code[" << ret << "]";
    return ret;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/layer_norm_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/layer_norm_fp16.cc
@ -95,8 +95,7 @@ int LayerNormFp16CPUKernel::Run() {
    var_data_ =
      reinterpret_cast<float16_t *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float16_t)));
  }
-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(LayerNormFp16Run, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_, LayerNormFp16Run, this, op_parameter_->thread_num_);
  if (out_tensors_.size() != 3) {
    context_->allocator->Free(mean_data_);
    context_->allocator->Free(var_data_);
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/log_softmax_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/log_softmax_fp16.cc
@ -95,8 +95,7 @@ int LogSoftmaxLastAxisFp16Run(void *cdata, int task_id, float lhs_scale, float r

 int LogSoftmaxFp16CPUKernel::Run() {
  if (in_plane_size_ == 1) {
-    auto ret = static_cast<const lite::InnerContext *>(this->context_)
-                 ->thread_pool_->ParallelLaunch(LogSoftmaxLastAxisFp16Run, this, op_parameter_->thread_num_);
+    auto ret = ParallelLaunch(this->context_, LogSoftmaxLastAxisFp16Run, this, op_parameter_->thread_num_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "LogSoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret;
    }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc
@ -295,8 +295,7 @@ int MatmulBaseFP16CPUKernel::Run() {
      batch_b_ptr_ = b_pack_ptr_ + i * params_->deep_ * params_->col_align_;
      batch_c_ptr_ = c_ptr + i * params_->row_ * params_->col_;
    }
-    auto ret = static_cast<const lite::InnerContext *>(this->context_)
-                 ->thread_pool_->ParallelLaunch(MatmulBaseFP16Run, this, thread_count_);
+    auto ret = ParallelLaunch(this->context_, MatmulBaseFP16Run, this, thread_count_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "MatmulBaseFloatRun failed";
      return ret;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc
@ -101,8 +101,7 @@ int PadFp16CPUKernel::Run() {
        output_[i] = pad_param_->constant_value_;
      }
    }
-    ret = static_cast<const lite::InnerContext *>(this->context_)
-            ->thread_pool_->ParallelLaunch(PadImpl, this, op_parameter_->thread_num_);
+    ret = ParallelLaunch(this->context_, PadImpl, this, op_parameter_->thread_num_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
    }
@ -114,8 +113,7 @@ int PadFp16CPUKernel::Run() {
      return ret;
    }

-    ret = static_cast<const lite::InnerContext *>(this->context_)
-            ->thread_pool_->ParallelLaunch(MirrorPadImpl, this, op_parameter_->thread_num_);
+    ret = ParallelLaunch(this->context_, MirrorPadImpl, this, op_parameter_->thread_num_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << ret << "]";
    }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc
@ -89,8 +89,7 @@ int PoolingFp16CPUKernel::Run() {
  fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
  fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());

-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(PoolingFp16Impl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_, PoolingFp16Impl, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc
@ -86,8 +86,7 @@ int PowerFp16CPUKernel::Run() {
      return ret;
    }
  }
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(PowerImplFp16, this, thread_count_);
+  auto ret = ParallelLaunch(this->context_, PowerImplFp16, this, thread_count_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "PowerFp16CPUKernel error: " << ret;
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc
@ -163,8 +163,7 @@ int QuantDTypeCastFp16CPUKernel::Run() {
    return RET_ERROR;
  }

-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(QuantDTypeCastFP16Run, this, thread_n_num_);
+  auto ret = ParallelLaunch(this->context_, QuantDTypeCastFP16Run, this, thread_n_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
@ -91,8 +91,7 @@ int ReduceFp16CPUKernel::Run() {
    outer_size_ = outer_sizes_.at(i);
    inner_size_ = inner_sizes_.at(i);
    axis_size_ = axis_sizes_.at(i);
-    auto error_code = static_cast<const lite::InnerContext *>(this->context_)
-                        ->thread_pool_->ParallelLaunch(ReduceFp16Impl, this, op_parameter_->thread_num_);
+    auto error_code = ParallelLaunch(this->context_, ReduceFp16Impl, this, op_parameter_->thread_num_);
    if (error_code != RET_OK) {
      FreeTmpBuffer();
      MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
@ -107,8 +106,7 @@ int ReduceFp16CPUKernel::Run() {
  outer_size_ = outer_sizes_.back();
  inner_size_ = inner_sizes_.back();
  axis_size_ = axis_sizes_.back();
-  auto error_code = static_cast<const lite::InnerContext *>(this->context_)
-                      ->thread_pool_->ParallelLaunch(ReduceFp16Impl, this, op_parameter_->thread_num_);
+  auto error_code = ParallelLaunch(this->context_, ReduceFp16Impl, this, op_parameter_->thread_num_);
  if (error_code != RET_OK) {
    FreeTmpBuffer();
    MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc
@ -117,8 +117,7 @@ int ScaleFp16CPUKernel::Run() {
    return ret;
  }

-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(ScaleFp16Run, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_, ScaleFp16Run, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
    FreeTmpBuffer();
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/slice_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/slice_fp16.cc
@ -63,8 +63,7 @@ int SliceFp16CPUKernel::Run() {
    DoSliceNoParallel(input_data, out_tensors_.at(0)->data_c(), param_, lite::DataTypeSize(kNumberTypeFloat16));
    return RET_OK;
  }
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(SliceFp16Launch, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, SliceFp16Launch, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "fp16 slice launch fail!ret: " << ret;
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc
@ -95,8 +95,7 @@ int SoftmaxLastAxisFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_

 int SoftmaxFp16CPUKernel::Run() {
  if (in_plane_size_ == 1) {
-    auto ret = static_cast<const lite::InnerContext *>(this->context_)
-                 ->thread_pool_->ParallelLaunch(SoftmaxLastAxisFp16Run, this, op_parameter_->thread_num_);
+    auto ret = ParallelLaunch(this->context_, SoftmaxLastAxisFp16Run, this, op_parameter_->thread_num_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "SoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret;
    }
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
@ -101,8 +101,7 @@ int StackFp16CPUKernel::Run() {
  }
  // run stack
  num_threads_ = MSMIN(UP_DIV(outer_size_, 64), this->op_parameter_->thread_num_);
-  ret =
-    static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(StackRun, this, num_threads_);
+  ret = ParallelLaunch(this->context_, StackRun, this, num_threads_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/activation_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/activation_fp16_grad.cc
@ -94,8 +94,7 @@ int ActivationGradRunFp16(void *cdata, int task_id, float lhs_scale, float rhs_s
 }

 int ActivationGradCPUKernelFp16::Run() {
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(ActivationGradRunFp16, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_, ActivationGradRunFp16, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/arithmetic_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/arithmetic_fp16_grad.cc
@ -75,8 +75,7 @@ int ArithmeticGradRunFp16(void *cdata, int task_id, float lhs_scale, float rhs_s
 }

 int ArithmeticGradCPUKernelFp16::Run() {
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(ArithmeticGradRunFp16, this, 1);
+  int error_code = ParallelLaunch(this->context_, ArithmeticGradRunFp16, this, 1);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "Arithmetic Grad function error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/arithmetic_fp16_self_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/arithmetic_fp16_self_grad.cc
@ -72,8 +72,7 @@ int ArithmeticSelfGradFp16Run(void *cdata, int task_id, float lhs_scale, float r
 }

 int ArithmeticSelfGradFp16CPUKernel::Run() {
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(ArithmeticSelfGradFp16Run, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_, ArithmeticSelfGradFp16Run, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bias_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bias_fp16_grad.cc
@ -83,8 +83,7 @@ int BiasGradFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale)
 }

 int BiasGradCPUKernelFp16::Run() {
-  int error_code =
-    static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(BiasGradFp16Run, this, 1);
+  int error_code = ParallelLaunch(this->context_, BiasGradFp16Run, this, 1);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "bias function error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc
@ -141,8 +141,7 @@ int BNGradCPUKernelFp16::Run() {
  stage_ = 0;
  thread_num_ = context_->thread_num_;
  if (thread_num_ == 1) {
-    int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                       ->thread_pool_->ParallelLaunch(BNGradFp16Run, this, thread_num_);
+    int error_code = ParallelLaunch(this->context_, BNGradFp16Run, this, thread_num_);
    if (error_code != RET_OK) {
      MS_LOG(ERROR) << "BN function error error_code[" << error_code << "]";
      return RET_ERROR;
@ -151,8 +150,7 @@ int BNGradCPUKernelFp16::Run() {
    const std::vector<int> threads = {thread_num_, 1, thread_num_};
    for (size_t stage = 0; stage < threads.size(); stage++) {
      stage_ = static_cast<int>(stage);
-      int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                         ->thread_pool_->ParallelLaunch(BNGradFp16Run, this, threads.at(stage));
+      int error_code = ParallelLaunch(this->context_, BNGradFp16Run, this, threads.at(stage));
      if (error_code != RET_OK) {
        MS_LOG(ERROR) << "BN function error error_code[" << error_code << "]";
        return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/convolution_fp16_grad_filter.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/convolution_fp16_grad_filter.cc
@ -191,8 +191,7 @@ int ConvolutionGradFilterCPUKernelFp16::Run() {
  auto *out_dw = out_tensors_.at(0);
  auto dw_addr = reinterpret_cast<float16_t *>(out_dw->data_c());
  memset(dw_addr, 0, out_dw->Size());
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(ConvolutionGradFilterFp16Run, this, context_->thread_num_);
+  int error_code = ParallelLaunch(this->context_, ConvolutionGradFilterFp16Run, this, context_->thread_num_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "conv filter function error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/convolution_fp16_grad_input.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/convolution_fp16_grad_input.cc
@ -176,8 +176,7 @@ int ConvolutionGradInputCPUKernelFp16::Run() {
  auto *out_dx = out_tensors_.at(0);
  auto dx_addr = reinterpret_cast<float16_t *>(out_dx->data_c());
  memset(dx_addr, 0, sizeof(float16_t) * batch * in_ch * in_h * in_w);
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(ConvolutionGradInputFp16Run, this, context_->thread_num_);
+  int error_code = ParallelLaunch(this->context_, ConvolutionGradInputFp16Run, this, context_->thread_num_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "bias function error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc
@ -82,8 +82,7 @@ int RunDropoutFp16Grad(void *cdata, int task_id, float lhs_scale, float rhs_scal
 }

 int DropoutGradCPUKernelFp16::Run() {
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(RunDropoutFp16Grad, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_, RunDropoutFp16Grad, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "Dropout Grad function error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc
@ -96,8 +96,7 @@ int LayerNormF16GradRun(void *cdata, int task_id, float lhs_scale, float rhs_sca
 }

 int LayerNormGradCPUKernelFp16::Run() {
-  int error_code =
-    static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(LayerNormF16GradRun, this, 1);
+  int error_code = ParallelLaunch(this->context_, LayerNormF16GradRun, this, 1);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "LayerNorm function error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/neg_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/neg_fp16_grad.cc
@ -55,8 +55,7 @@ int NegGradCPUKernelFp16::DoNegGrad(int task_id) {
 int NegGradCPUKernelFp16::ReSize() { return RET_OK; }

 int NegGradCPUKernelFp16::Run() {
-  int ret = static_cast<const lite::InnerContext *>(this->context_)
-              ->thread_pool_->ParallelLaunch(NegGradRun, this, thread_count_);
+  int ret = ParallelLaunch(this->context_, NegGradRun, this, thread_count_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "parallel launch fail!ret: " << ret;
    return ret;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc
@ -99,8 +99,7 @@ int PoolingFp16GradImpl(void *cdata, int task_id, float lhs_scale, float rhs_sca

 int PoolingGradCPUKernelFp16::Run() {
  thread_num_ = context_->thread_num_;
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(PoolingFp16GradImpl, this, thread_num_);
+  int error_code = ParallelLaunch(this->context_, PoolingFp16GradImpl, this, thread_num_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc
@ -91,8 +91,7 @@ int ResizeGradCPUKernelFp16::Run() {
  auto out_addr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
  size_t elem_number = out_tensors_.at(0)->ElementsNum();
  std::fill(out_addr, out_addr + elem_number, 0.f);
-  int error_code =
-    static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(ResizeFp16GradRun, this, 1);
+  int error_code = ParallelLaunch(this->context_, ResizeFp16GradRun, this, 1);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "ResizeGradCPUKernelFp16 function error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/strided_slice_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/strided_slice_fp16_grad.cc
@ -123,8 +123,7 @@ int StridedSliceFp16GradImpl(void *cdata, int task_id, float lhs_scale, float rh
 }

 int StridedSliceGradCPUKernelFp16::Run() {
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(StridedSliceFp16GradImpl, this, 1);
+  int error_code = ParallelLaunch(this->context_, StridedSliceFp16GradImpl, this, 1);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "Strided slice error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/unsorted_segment_sum_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/unsorted_segment_sum_fp16.cc
@ -67,8 +67,7 @@ int UnsortedSegmentSumFp16Run(void *cdata, int task_id, float lhs_scale, float r
 }

 int UnsortedSegmentSumCPUKernelFp16::Run() {
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(UnsortedSegmentSumFp16Run, this, 1);
+  int error_code = ParallelLaunch(this->context_, UnsortedSegmentSumFp16Run, this, 1);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "Strided slice error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc
@ -107,8 +107,7 @@ int ActivationRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }

 int ActivationCPUKernel::Run() {
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(ActivationRun, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_, ActivationRun, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc
@ -121,8 +121,7 @@ int AdderCPUKernel::Run() {
    return RET_ERROR;
  }

-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(AdderImpl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_, AdderImpl, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "adder error error_code[" << error_code << "]";
    FreeTmpBuffer();
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc
@ -88,8 +88,7 @@ int AddNCPUKernel::Run() {
  in1_addr_ = input0_data;
  in2_addr_ = input1_data;
  out_addr_ = output_data;
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, AddNLaunch, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "addn launch fail!ret: " << ret;
    return RET_ERROR;
@ -97,8 +96,7 @@ int AddNCPUKernel::Run() {
  for (size_t i = 2; i < in_tensors_.size(); ++i) {
    in1_addr_ = reinterpret_cast<float *>(in_tensors_[i]->MutableData());
    in2_addr_ = output_data;
-    ret = static_cast<const lite::InnerContext *>(this->context_)
-            ->thread_pool_->ParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_);
+    ret = ParallelLaunch(this->context_, AddNLaunch, this, op_parameter_->thread_num_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i;
      return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
@ -419,8 +419,7 @@ int ArithmeticCPUKernel::Run() {
    input1_ptr_ = in_tensors_[1]->data_c();
  }
  output_ptr_ = out_tensors_[0]->data_c();
-  return static_cast<const lite::InnerContext *>(this->context_)
-    ->thread_pool_->ParallelLaunch(ArithmeticsRun, this, op_parameter_->thread_num_);
+  return ParallelLaunch(this->context_, ArithmeticsRun, this, op_parameter_->thread_num_);
 }

 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MulFusion, LiteKernelCreator<ArithmeticCPUKernel>)
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc
@ -114,8 +114,7 @@ int ArithmeticSelfRun(void *cdata, int task_id, float lhs_scale, float rhs_scale
 }

 int ArithmeticSelfCPUKernel::Run() {
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(ArithmeticSelfRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc
@ -75,8 +75,7 @@ int BatchnormCPUKernel::InitConstTensor() {
 }

 int BatchnormCPUKernel::Run() {
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, BatchNormRun, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc
@ -140,8 +140,7 @@ int CastCPUKernel::Run() {
  if (data_num_ == 0) {
    return RET_OK;
  }
-  return static_cast<const lite::InnerContext *>(this->context_)
-    ->thread_pool_->ParallelLaunch(CastRun, this, op_parameter_->thread_num_);
+  return ParallelLaunch(this->context_, CastRun, this, op_parameter_->thread_num_);
 }

 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Cast, LiteKernelCreator<CastCPUKernel>)
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
@ -69,8 +69,7 @@ int ConcatRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }

 int ConcatCPUKernel::Run() {
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(ConcatRun, this, op_parameter_->thread_num_);
+  int error_code = ParallelLaunch(this->context_, ConcatRun, this, op_parameter_->thread_num_);
  return error_code;
 }

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc
@ -256,12 +256,10 @@ int Convolution1x1CPUKernel::Run() {
    }

    if (multi_thread_by_hw_) {
-      static_cast<const lite::InnerContext *>(this->context_)
-        ->thread_pool_->ParallelLaunch(Convolution1x1RunHw, this, thread_count_);
+      ParallelLaunch(this->context_, Convolution1x1RunHw, this, thread_count_);
    } else {
      PackMatmulInput(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
-      static_cast<const lite::InnerContext *>(this->context_)
-        ->thread_pool_->ParallelLaunch(Convolution1x1Run, this, thread_count_);
+      ParallelLaunch(this->context_, Convolution1x1Run, this, thread_count_);
    }
  }

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc
@ -133,8 +133,7 @@ int ConvolutionDepthwise3x3CPUKernel::Run() {
  auto output_tensor = out_tensors_.at(kOutputIndex);
  output_ptr_ = reinterpret_cast<float *>(output_tensor->data_c());
  MS_ASSERT(output_ptr_ != nullptr);
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(ConvDw3x3Run, this, conv_param_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, ConvDw3x3Run, this, conv_param_->thread_num_);
  ctx_->allocator->Free(buffer_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]";
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc
@ -116,8 +116,7 @@ int ConvolutionDepthwiseCPUKernel::Run() {
  output_ptr_ = reinterpret_cast<float *>(output_tensor->data_c());
  MS_ASSERT(output_ptr_ != nullptr);

-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(ConvDwRun, this, conv_param_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, ConvDwRun, this, conv_param_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc
@ -203,8 +203,7 @@ int ConvolutionDepthwiseIndirectCPUKernel::Run() {
  MS_ASSERT(output_ptr_ != nullptr);
  ConvDwInitIndirection(indirect_buffer_, packed_input_, zero_ptr_, conv_param_, step_h, step_w);

-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(ConvDwIndirectRun, this, conv_param_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, ConvDwIndirectRun, this, conv_param_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ConvDwIndirectRun error: error_code[" << ret << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc
@ -171,8 +171,7 @@ int ConvolutionDepthwiseSWCPUKernel::Run() {
    packed_output_ = output_ptr;
  }

-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(ConvDwSWRun, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_, ConvDwSWRun, this, conv_param_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
@ -168,8 +168,7 @@ int ConvolutionDepthwiseSWCPUKernelX86::Run() {
    packed_output_ = output_ptr;
  }

-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(ConvDwSWAvxRun, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_, ConvDwSWAvxRun, this, conv_param_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "ConvDwSWAvxRun error: error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc
@ -151,8 +151,7 @@ int ConvolutionCPUKernel::Run() {
    PackWeight();
  }

-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(ConvolutionImpl, this, thread_count_);
+  ret = ParallelLaunch(this->context_, ConvolutionImpl, this, thread_count_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "conv error error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc
@ -183,8 +183,7 @@ int ConvolutionSWCPUKernel::Run() {
    FreeTmpBuffer();
    return ret;
  }
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(ConvolutionSWImpl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_, ConvolutionSWImpl, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "conv error error_code[" << error_code << "]";
    FreeTmpBuffer();
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc
@ -224,8 +224,7 @@ int ConvolutionWinogradCPUKernel::Run() {
    }
  }

-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(ConvolutionWinogradImpl, this, thread_count_);
+  ret = ParallelLaunch(this->context_, ConvolutionWinogradImpl, this, thread_count_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc
@ -158,8 +158,7 @@ int CropAndResizeCPUKernel::Run() {
    return ret;
  }

-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(CropAndResizeImpl, this, op_parameter_->thread_num_);
+  int error_code = ParallelLaunch(this->context_, CropAndResizeImpl, this, op_parameter_->thread_num_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "CropAndResize run error, error_code[" << error_code << "]";
    FreeTmpBuffer();
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc
@ -61,8 +61,7 @@ int CropCPUKernel::Run() {
    return RET_OK;
  }

-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(CropLaunch, this, crop_para_->thread_count_);
+  auto ret = ParallelLaunch(this->context_, CropLaunch, this, crop_para_->thread_count_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Crop launch fail!ret: " << ret;
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cumsum_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cumsum_fp32.cc
@ -136,8 +136,7 @@ int CumSumCPUKernel::DoCumsumInt(int task_id) {
 }

 int CumSumCPUKernel::Run() {
-  int ret = static_cast<const lite::InnerContext *>(this->context_)
-              ->thread_pool_->ParallelLaunch(CumsumLaunch, this, op_parameter_->thread_num_);
+  int ret = ParallelLaunch(this->context_, CumsumLaunch, this, op_parameter_->thread_num_);

  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Crop launch fail!ret: " << ret;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc
@ -177,8 +177,7 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
    packed_output_ = output_addr;
  }

-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(DeconvDwRun, this, conv_param_->thread_num_);
+  ret = ParallelLaunch(this->context_, DeconvDwRun, this, conv_param_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc
@ -233,8 +233,7 @@ int DeConvolutionCPUKernel::Run() {
    RowMajor2Col12Major(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
 #endif

-    error_code = static_cast<const lite::InnerContext *>(this->context_)
-                   ->thread_pool_->ParallelLaunch(DeConvFp32Run, this, thread_count_);
+    error_code = ParallelLaunch(this->context_, DeConvFp32Run, this, thread_count_);
    if (error_code != RET_OK) {
      MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
      FreeRunBuf();
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc
@ -411,8 +411,7 @@ int DeConvolutionWinogradCPUKernel::Run() {
    nhwc_output_ = src_out + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_;

    ::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float));
-    ret = static_cast<const lite::InnerContext *>(this->context_)
-            ->thread_pool_->ParallelLaunch(DeConvWgFp32Run, this, deconv_param_->thread_num_);
+    ret = ParallelLaunch(this->context_, DeConvWgFp32Run, this, deconv_param_->thread_num_);
    if (ret != RET_OK) {
      FreeRunBuf();
      MS_LOG(ERROR) << "DeConvWgFp32Run failed!";
@ -420,8 +419,7 @@ int DeConvolutionWinogradCPUKernel::Run() {
    }

    /* post bias activate and nhwc */
-    ret = static_cast<const lite::InnerContext *>(this->context_)
-            ->thread_pool_->ParallelLaunch(DeConvWgPostFp32Run, this, thread_num_hw_);
+    ret = ParallelLaunch(this->context_, DeConvWgPostFp32Run, this, thread_num_hw_);
    if (ret != RET_OK) {
      FreeRunBuf();
      MS_LOG(ERROR) << "DeConvWgPostFp32Run failed!";
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc
@ -58,8 +58,7 @@ int EluRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }

 int EluCPUKernel::Run() {
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(EluRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, EluRun, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc
@ -86,8 +86,7 @@ int EmbeddingLookupCPUKernel::Run() {
    memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum());
    dest_loc += in_tensors_.at(i)->ElementsNum();
  }
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(EmbeddingLookupRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, EmbeddingLookupRun, this, op_parameter_->thread_num_);
  FreeRunBuff();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]";
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc
@ -72,8 +72,7 @@ int ExpCPUKernel::Run() {
  output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
  exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum();

-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(ExpRun, this, exp_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, ExpRun, this, exp_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Exp error: error_code[" << ret << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc
@ -90,8 +90,7 @@ int FillCPUKernel::Run() {
    MS_LOG(ERROR) << "unsupported fill data type " << fill_input->data_type();
    return RET_ERROR;
  }
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(FillRun, this, thread_sz_count_);
+  auto ret = ParallelLaunch(this->context_, FillRun, this, thread_sz_count_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "FillRun error error_code[" << ret << "]";
    return ret;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc
@ -91,8 +91,7 @@ int FusedBatchnormCPUKernel::Run() {

    trained_ = true;  // trained at least once
  }
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, BatchNormRun, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc
@ -127,8 +127,7 @@ int GatherNdCPUKernel::Run() {
  in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
  out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
  InitOffset();
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(GatherNdRun, this, thread_sz_count_);
+  auto ret = ParallelLaunch(this->context_, GatherNdRun, this, thread_sz_count_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "gatherNd error error_code[" << ret << "]";
    return ret;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc
@ -91,8 +91,7 @@ int GatherCPUKernel::Run() {
    return ret;
  }

-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(GatherRun, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_, GatherRun, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
@ -66,8 +66,7 @@ int InstanceNormCPUKernel::Run() {
  gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->data_c());
  beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->data_c());
  dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(InstanceNormRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(this->context_, InstanceNormRun, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]";
    return ret;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc
@ -146,8 +146,7 @@ int L2NormCPUKernel::Run() {
  int ret;
  if (l2_norm_param_->axis_num_ == 0 || l2_norm_param_->axis_num_ == input_shape.size()) {
    // all axis
-    ret = static_cast<const lite::InnerContext *>(this->context_)
-            ->thread_pool_->ParallelLaunch(SquareSumRun, this, op_parameter_->thread_num_);
+    ret = ParallelLaunch(this->context_, SquareSumRun, this, op_parameter_->thread_num_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
      return RET_ERROR;
@ -157,15 +156,13 @@ int L2NormCPUKernel::Run() {
      sum += tmp_sum_[i];
    }
    sqrt_sum_ = sqrt(sum > l2_norm_param_->epsilon_ ? sum : l2_norm_param_->epsilon_);
-    ret = static_cast<const lite::InnerContext *>(this->context_)
-            ->thread_pool_->ParallelLaunch(L2NormRun, this, op_parameter_->thread_num_);
+    ret = ParallelLaunch(this->context_, L2NormRun, this, op_parameter_->thread_num_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
      return RET_ERROR;
    }
  } else if (l2_norm_param_->axis_num_ == 1 && l2_norm_param_->axis_[0] == static_cast<int>(input_shape.size()) - 1) {
-    ret = static_cast<const lite::InnerContext *>(this->context_)
-            ->thread_pool_->ParallelLaunch(L2NormTrailingAxisRun, this, op_parameter_->thread_num_);
+    ret = ParallelLaunch(this->context_, L2NormTrailingAxisRun, this, op_parameter_->thread_num_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
      return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
@ -92,8 +92,7 @@ int LayerNormCPUKernel::Run() {
    mean_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float)));
    var_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float)));
  }
-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(LayerNormRun, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_, LayerNormRun, this, op_parameter_->thread_num_);
  if (out_tensors_.size() != 3) {
    context_->allocator->Free(mean_data_);
    context_->allocator->Free(var_data_);
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc
@ -72,8 +72,7 @@ int LocalResponseNormRun(void *cdata, int task_id, float lhs_scale, float rhs_sc
 }

 int LocalResponseNormCPUKernel::Run() {
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(LocalResponseNormRun, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_, LocalResponseNormRun, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "LocalResponseNorm function error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/log_softmax_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/log_softmax_fp32.cc
@ -96,8 +96,7 @@ int LogSoftmaxLastAxisRun(void *cdata, int task_id, float lhs_scale, float rhs_s
 int LogSoftmaxCPUKernel::Run() {
  int ret = RET_OK;
  if (in_plane_size_ == 1) {
-    ret = static_cast<const lite::InnerContext *>(this->context_)
-            ->thread_pool_->ParallelLaunch(LogSoftmaxLastAxisRun, this, op_parameter_->thread_num_);
+    ret = ParallelLaunch(this->context_, LogSoftmaxLastAxisRun, this, op_parameter_->thread_num_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "LogSoftmaxCPUKernel ParallelLaunch failed, ret: " << ret;
    }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection_fp32.cc
@ -60,8 +60,7 @@ int LshProjectionCPUKernel::Run() {
  if (ret != RET_OK) {
    return ret;
  }
-  ret = static_cast<const lite::InnerContext *>(this->context_)
-          ->thread_pool_->ParallelLaunch(LshProjectionRun, this, op_parameter_->thread_num_);
+  ret = ParallelLaunch(this->context_, LshProjectionRun, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "LshProjection kernel parallel launch failed";
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc
@ -426,8 +426,7 @@ int MatmulFp32BaseCPUKernel::Run() {
      // need not aligned
      batch_c_ptr_ = output_data_ + i * params_->row_ * params_->col_;
    }
-    ret = static_cast<const lite::InnerContext *>(this->context_)
-            ->thread_pool_->ParallelLaunch(MatmulBaseFloatRun, this, thread_count_);
+    ret = ParallelLaunch(this->context_, MatmulBaseFloatRun, this, thread_count_);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "MatmulBaseFloatRun failed";
    }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot_fp32.cc
@ -180,8 +180,7 @@ int OneHotCPUKernel::GetParams() {
 }

 int OneHotCPUKernel::Run() {
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(RunOneHot, this, op_parameter_->thread_num_);
+  int error_code = ParallelLaunch(this->context_, RunOneHot, this, op_parameter_->thread_num_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "OneHot function error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc
@ -408,8 +408,7 @@ int PadCPUKernel::Run() {
        output_data[i] = pad_param_->constant_value_;
      }
    }
-    error_code = static_cast<const lite::InnerContext *>(this->context_)
-                   ->thread_pool_->ParallelLaunch(PadImpl, this, op_parameter_->thread_num_);
+    error_code = ParallelLaunch(this->context_, PadImpl, this, op_parameter_->thread_num_);
    if (error_code != RET_OK) {
      MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]";
      return RET_ERROR;
@ -422,8 +421,7 @@ int PadCPUKernel::Run() {
      return error_code;
    }

-    error_code = static_cast<const lite::InnerContext *>(this->context_)
-                   ->thread_pool_->ParallelLaunch(MirrorPadImpl, this, op_parameter_->thread_num_);
+    error_code = ParallelLaunch(this->context_, MirrorPadImpl, this, op_parameter_->thread_num_);
    if (error_code != RET_OK) {
      MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << error_code << "]";
      return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc
@ -84,8 +84,7 @@ int PoolingImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }

 int PoolingCPUKernel::Run() {
-  int error_code = static_cast<const lite::InnerContext *>(this->context_)
-                     ->thread_pool_->ParallelLaunch(PoolingImpl, this, thread_count_);
+  int error_code = ParallelLaunch(this->context_, PoolingImpl, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc
@ -40,8 +40,7 @@ int PowerImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }

 int PowerCPUKernel::Run() {
-  auto ret = static_cast<const lite::InnerContext *>(this->context_)
-               ->thread_pool_->ParallelLaunch(PowerImpl, this, thread_count_);
+  auto ret = ParallelLaunch(this->context_, PowerImpl, this, thread_count_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "PowerCPUKernel error: " << ret;
    return RET_ERROR;
--- a/Show More
+++ b/Show More