!18311 optimize parallel function call

Merge pull request !18311 from yangjie159/mindrt_thread
This commit is contained in:
i-robot 2021-06-16 09:23:29 +08:00 committed by Gitee
commit fe37f625ca
183 changed files with 222 additions and 409 deletions

View File

@ -305,4 +305,15 @@ NpuDeviceInfo InnerContext::GetNpuInfo() const {
// Support CPU backend to judge whether it supports Float16.
bool InnerContext::IsSupportFloat16() const { return fp16_flag_; }
ActorThreadPool *InnerContext::thread_pool() const { return thread_pool_; }
int ParallelLaunch(const Context *context, const Func &func, Content content, int task_num) {
ActorThreadPool *pool = static_cast<const lite::InnerContext *>(context)->thread_pool();
if (pool == nullptr) {
MS_LOG(ERROR) << "thread pool is nullptr";
return RET_NULL_PTR;
}
return pool->ParallelLaunch(func, content, task_num);
}
} // namespace mindspore::lite

View File

@ -30,9 +30,6 @@
namespace mindspore::lite {
struct InnerContext : public Context {
public:
ActorThreadPool *thread_pool_{nullptr};
public:
InnerContext() = default;
@ -64,6 +61,8 @@ struct InnerContext : public Context {
int IsValid() const;
ActorThreadPool *thread_pool() const;
virtual ~InnerContext();
private:
@ -83,6 +82,8 @@ struct InnerContext : public Context {
bool fp16_flag_ = false;
ActorThreadPool *thread_pool_{nullptr};
#ifdef ENABLE_ARM
#ifndef MS_COMPILE_IOS
CpuInfo *cpu_info_ = nullptr;
@ -95,6 +96,9 @@ struct InnerContext : public Context {
#endif
#endif
};
int ParallelLaunch(const Context *context, const Func &func, Content content, int task_num);
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_INNER_CONTEXT_H

View File

@ -323,7 +323,7 @@ std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel
const lite::InnerContext *ctx) {
std::vector<std::shared_ptr<LiteOpActor>> actors;
std::unordered_map<size_t, AID> partial_map{};
auto thread_pool = ctx->thread_pool_;
auto thread_pool = ctx->thread_pool();
if (thread_pool == nullptr) {
MS_LOG(ERROR) << "thread pool is nullptr";
return actors;

View File

@ -881,7 +881,7 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
}
int LiteSession::InitGPURuntime() {
ActorThreadPool *thread_pool = this->context_->thread_pool_;
ActorThreadPool *thread_pool = this->context_->thread_pool();
if (thread_pool == nullptr) {
MS_LOG(ERROR) << "thread pool is nullptr";
is_running_.store(false);

View File

@ -76,8 +76,7 @@ int ConstantOfShapeCPUKernel::Run() {
}
thread_stride_ = UP_DIV(param_->element_size_, thread_count);
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConstantOfShapeRun, this, thread_count);
auto ret = ParallelLaunch(this->context_, ConstantOfShapeRun, this, thread_count);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConstantOfShapeRun error error_code[" << ret << "]";
return ret;

View File

@ -236,8 +236,7 @@ int DetectionPostProcessBaseCPUKernel::Run() {
return status;
}
} else {
status = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_);
status = ParallelLaunch(this->context_, NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_);
if (status != RET_OK) {
MS_LOG(ERROR) << "NmsMultiClassesFastCoreRun error error_code[" << status << "]";
FreeAllocatedBuffer();

View File

@ -165,8 +165,7 @@ int RunPriorBox(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
}
int PriorBoxCPUKernel::Run() {
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(RunPriorBox, this, thread_count_);
int error_code = ParallelLaunch(this->context_, RunPriorBox, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "PriorBox run error, error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -175,8 +175,7 @@ int QuantDTypeCastCPUKernel::Run() {
uint8_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_[0]->data_c());
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(QuantDTypeCastRun, this, thread_n_num_);
auto ret = ParallelLaunch(this->context_, QuantDTypeCastRun, this, thread_n_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
if (in_tensors_[0]->data_type() == TypeId::kNumberTypeInt8 &&

View File

@ -70,8 +70,7 @@ int ReshapeRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
int ReshapeBaseCPUKernel::Run() {
input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.at(kInputIndex)->data_c());
output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.at(kOutputIndex)->data_c());
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ReshapeRun, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, ReshapeRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]";
return ret;

View File

@ -82,8 +82,7 @@ int SliceCPUKernel::Run() {
lite::DataTypeSize(in_tensors_.at(0)->data_type()));
return RET_OK;
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(SliceLaunch, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, SliceLaunch, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "slice launch fail!ret: " << ret;
return RET_ERROR;

View File

@ -125,8 +125,7 @@ int SplitBaseCPUKernel::Run() {
output_ptr_.at(i) = output_tensor->data_c();
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(SplitRun, this, thread_n_num_);
auto ret = ParallelLaunch(this->context_, SplitRun, this, thread_n_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "split error error_code[" << ret << "]";
}

View File

@ -117,8 +117,7 @@ int SplitWithOverlapBaseCPUKernel::Run() {
inner_stride_ *= input_shape[i];
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(SplitWithOverlapRun, this, context_->thread_num_);
auto ret = ParallelLaunch(this->context_, SplitWithOverlapRun, this, context_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ParallelLaunch for SplitWIthOverlapRun run fail. errorcode:[" << ret << "]";
return RET_ERROR;

View File

@ -100,8 +100,7 @@ int StackBaseCPUKernel::Run() {
}
// run stack
num_threads_ = MSMIN(UP_DIV(outer_size_, 64), op_parameter_->thread_num_);
auto ret =
static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(StackRun, this, num_threads_);
auto ret = ParallelLaunch(this->context_, StackRun, this, num_threads_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]";
return RET_ERROR;

View File

@ -162,8 +162,7 @@ int StridedSliceCPUKernel::FastRun() {
}
input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.front()->data_c());
output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.front()->data_c());
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(StrideRun, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, StrideRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Stride run error error_code[" << ret << "]";
return ret;

View File

@ -128,8 +128,7 @@ int TileCPUKernel::SimpleTileImpl(int task_id) {
}
int TileCPUKernel::RunSimpleTile() {
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(SimpleTile, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, SimpleTile, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "RunSimpleTile error code[" << ret << "]";
return ret;

View File

@ -103,8 +103,7 @@ int ActivationFp16CPUKernel::Run() {
fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ActivationFp16Run, this, thread_count_);
int error_code = ParallelLaunch(this->context_, ActivationFp16Run, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -88,8 +88,7 @@ int AddNFp16CPUKernel::Run() {
in1_addr_ = input0_data;
in2_addr_ = input1_data;
out_addr_ = out_data;
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, AddNLaunch, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "addn launch fail!ret: " << ret;
return RET_ERROR;
@ -97,8 +96,7 @@ int AddNFp16CPUKernel::Run() {
for (size_t i = 2; i < in_tensors_.size(); ++i) {
in1_addr_ = reinterpret_cast<float16_t *>(in_tensors_[i]->MutableData());
in2_addr_ = out_data;
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, AddNLaunch, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i;
return RET_ERROR;

View File

@ -168,8 +168,7 @@ int ArithmeticCompareFP16CPUKernel::Run() {
FreeTmpBuffer();
return RET_ERROR;
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ArithmeticsRunFp16, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, ArithmeticsRunFp16, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ArithmeticsRunFp16 run error error_code[" << ret << "]";
}

View File

@ -182,8 +182,7 @@ int ArithmeticFP16CPUKernel::Run() {
FreeFp16Buffer();
return RET_ERROR;
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ArithmeticsRun, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, ArithmeticsRun, this, op_parameter_->thread_num_);
if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32) {
Float16ToFloat32(static_cast<float16_t *>(output_ptr_), reinterpret_cast<float *>(output_tensor->MutableData()),
output_tensor->ElementsNum());

View File

@ -84,8 +84,7 @@ int ArithmeticSelfFp16CPUKernel::Run() {
}
output_fp16_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ArithmeticSelfRun, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
}

View File

@ -63,8 +63,7 @@ int BatchnormFp16CPUKernel::Run() {
return RET_ERROR;
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, BatchNormRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
}

View File

@ -131,8 +131,7 @@ int CastFp16CPUKernel::Run() {
if (data_num_ == 0) {
return RET_OK;
}
return static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(CastFp16Run, this, op_parameter_->thread_num_);
return ParallelLaunch(this->context_, CastFp16Run, this, op_parameter_->thread_num_);
}
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Cast, LiteKernelCreator<CastFp16CPUKernel>)

View File

@ -261,16 +261,14 @@ int Convolution1x1FP16CPUKernel::Run() {
int ret = RET_ERROR;
if (multi_thread_by_hw_) {
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(Convolution1x1Fp16RunHw, this, thread_count_);
ret = ParallelLaunch(this->context_, Convolution1x1Fp16RunHw, this, thread_count_);
} else {
#ifdef ENABLE_ARM64
RowMajor2Col16MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
#else
RowMajor2Col12MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
#endif
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(Convolution1x1Fp16RunOc, this, thread_count_);
ret = ParallelLaunch(this->context_, Convolution1x1Fp16RunOc, this, thread_count_);
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "ParallelLaunch failed.";

View File

@ -117,8 +117,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() {
}
is_repack_ = false;
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvDwFp16Run, this, conv_param_->thread_num_);
auto ret = ParallelLaunch(this->context_, ConvDwFp16Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]";
}

View File

@ -169,8 +169,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
}
is_repack_ = false;
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvDwSWFp16Run, this, conv_param_->thread_num_);
ret = ParallelLaunch(this->context_, ConvDwSWFp16Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]";
}

View File

@ -160,8 +160,7 @@ int ConvolutionFP16CPUKernel::Run() {
}
is_repack_ = false;
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvolutionFp16Impl, this, thread_count_);
ret = ParallelLaunch(this->context_, ConvolutionFp16Impl, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]";
}

View File

@ -237,8 +237,7 @@ int ConvolutionWinogradFP16CPUKernel::Run() {
}
is_repack_ = false;
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvolutionWinogradFp16Impl, this, thread_count_);
ret = ParallelLaunch(this->context_, ConvolutionWinogradFp16Impl, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
}

View File

@ -52,8 +52,7 @@ int CropFp16CPUKernel::Run() {
input_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
output_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(CropFp16Run, this, crop_para_->thread_count_);
auto ret = ParallelLaunch(this->context_, CropFp16Run, this, crop_para_->thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ParallelLaunch failed: " << ret;
}

View File

@ -179,8 +179,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
memset(output_ptr, 0, out_tensors_.at(kOutputIndex)->ElementsNum() * sizeof(float16_t));
packed_output_ = output_ptr;
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(DeconvDwFp16Run, this, conv_param_->thread_num_);
ret = ParallelLaunch(this->context_, DeconvDwFp16Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]";
}

View File

@ -222,8 +222,7 @@ int DeConvolutionFp16CPUKernel::Run() {
RowMajor2Col16MajorFp16Opt(batch_input_, pack_input_, input_plane_, conv_param_->input_channel_);
error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(DeConvFp16Run, this, thread_count_);
error_code = ParallelLaunch(this->context_, DeConvFp16Run, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]";
}

View File

@ -399,15 +399,13 @@ int DeConvWinogradFp16CPUKernel::Run() {
nhwc_output_ = output_ptr + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_;
::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float16_t));
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(DeConvWgFp16Run, this, deconv_param_->thread_num_);
auto ret = ParallelLaunch(this->context_, DeConvWgFp16Run, this, deconv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "DeConvWgFp16Run failed!";
return ret;
}
// post bias activate and nhwc
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(DeConvWgPostFp16Run, this, thread_num_hw_);
ret = ParallelLaunch(this->context_, DeConvWgPostFp16Run, this, thread_num_hw_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "DeConvWgPostFp16Run failed!";
return ret;

View File

@ -147,8 +147,7 @@ int GatherFp16CPUKernel::Run() {
Float32ToFloat16(reinterpret_cast<float *>(input_tensor->data_c()), input_data_, input_tensor->ElementsNum());
}
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(GatherRunFp16, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, GatherRunFp16, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]";
}

View File

@ -108,8 +108,7 @@ int InstanceNormFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_sca
int InstanceNormFp16CPUKernel::Run() {
src_data_ = reinterpret_cast<float16_t *>(in_tensors_[0]->data_c());
dst_data_ = reinterpret_cast<float16_t *>(out_tensors_[0]->data_c());
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(InstanceNormFp16Run, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, InstanceNormFp16Run, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "InstanceNormFp16Run error error_code[" << ret << "]";
return ret;

View File

@ -95,8 +95,7 @@ int LayerNormFp16CPUKernel::Run() {
var_data_ =
reinterpret_cast<float16_t *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float16_t)));
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(LayerNormFp16Run, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, LayerNormFp16Run, this, op_parameter_->thread_num_);
if (out_tensors_.size() != 3) {
context_->allocator->Free(mean_data_);
context_->allocator->Free(var_data_);

View File

@ -95,8 +95,7 @@ int LogSoftmaxLastAxisFp16Run(void *cdata, int task_id, float lhs_scale, float r
int LogSoftmaxFp16CPUKernel::Run() {
if (in_plane_size_ == 1) {
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(LogSoftmaxLastAxisFp16Run, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, LogSoftmaxLastAxisFp16Run, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "LogSoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret;
}

View File

@ -295,8 +295,7 @@ int MatmulBaseFP16CPUKernel::Run() {
batch_b_ptr_ = b_pack_ptr_ + i * params_->deep_ * params_->col_align_;
batch_c_ptr_ = c_ptr + i * params_->row_ * params_->col_;
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(MatmulBaseFP16Run, this, thread_count_);
auto ret = ParallelLaunch(this->context_, MatmulBaseFP16Run, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "MatmulBaseFloatRun failed";
return ret;

View File

@ -101,8 +101,7 @@ int PadFp16CPUKernel::Run() {
output_[i] = pad_param_->constant_value_;
}
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(PadImpl, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, PadImpl, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
}
@ -114,8 +113,7 @@ int PadFp16CPUKernel::Run() {
return ret;
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(MirrorPadImpl, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, MirrorPadImpl, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << ret << "]";
}

View File

@ -89,8 +89,7 @@ int PoolingFp16CPUKernel::Run() {
fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(PoolingFp16Impl, this, thread_count_);
int error_code = ParallelLaunch(this->context_, PoolingFp16Impl, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -86,8 +86,7 @@ int PowerFp16CPUKernel::Run() {
return ret;
}
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(PowerImplFp16, this, thread_count_);
auto ret = ParallelLaunch(this->context_, PowerImplFp16, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "PowerFp16CPUKernel error: " << ret;
return RET_ERROR;

View File

@ -163,8 +163,7 @@ int QuantDTypeCastFp16CPUKernel::Run() {
return RET_ERROR;
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(QuantDTypeCastFP16Run, this, thread_n_num_);
auto ret = ParallelLaunch(this->context_, QuantDTypeCastFP16Run, this, thread_n_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
return RET_ERROR;

View File

@ -91,8 +91,7 @@ int ReduceFp16CPUKernel::Run() {
outer_size_ = outer_sizes_.at(i);
inner_size_ = inner_sizes_.at(i);
axis_size_ = axis_sizes_.at(i);
auto error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ReduceFp16Impl, this, op_parameter_->thread_num_);
auto error_code = ParallelLaunch(this->context_, ReduceFp16Impl, this, op_parameter_->thread_num_);
if (error_code != RET_OK) {
FreeTmpBuffer();
MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
@ -107,8 +106,7 @@ int ReduceFp16CPUKernel::Run() {
outer_size_ = outer_sizes_.back();
inner_size_ = inner_sizes_.back();
axis_size_ = axis_sizes_.back();
auto error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ReduceFp16Impl, this, op_parameter_->thread_num_);
auto error_code = ParallelLaunch(this->context_, ReduceFp16Impl, this, op_parameter_->thread_num_);
if (error_code != RET_OK) {
FreeTmpBuffer();
MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";

View File

@ -117,8 +117,7 @@ int ScaleFp16CPUKernel::Run() {
return ret;
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ScaleFp16Run, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, ScaleFp16Run, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
FreeTmpBuffer();

View File

@ -63,8 +63,7 @@ int SliceFp16CPUKernel::Run() {
DoSliceNoParallel(input_data, out_tensors_.at(0)->data_c(), param_, lite::DataTypeSize(kNumberTypeFloat16));
return RET_OK;
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(SliceFp16Launch, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, SliceFp16Launch, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "fp16 slice launch fail!ret: " << ret;
return RET_ERROR;

View File

@ -95,8 +95,7 @@ int SoftmaxLastAxisFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_
int SoftmaxFp16CPUKernel::Run() {
if (in_plane_size_ == 1) {
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(SoftmaxLastAxisFp16Run, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, SoftmaxLastAxisFp16Run, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "SoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret;
}

View File

@ -101,8 +101,7 @@ int StackFp16CPUKernel::Run() {
}
// run stack
num_threads_ = MSMIN(UP_DIV(outer_size_, 64), this->op_parameter_->thread_num_);
ret =
static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(StackRun, this, num_threads_);
ret = ParallelLaunch(this->context_, StackRun, this, num_threads_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]";
return RET_ERROR;

View File

@ -94,8 +94,7 @@ int ActivationGradRunFp16(void *cdata, int task_id, float lhs_scale, float rhs_s
}
int ActivationGradCPUKernelFp16::Run() {
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ActivationGradRunFp16, this, thread_count_);
int error_code = ParallelLaunch(this->context_, ActivationGradRunFp16, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -75,8 +75,7 @@ int ArithmeticGradRunFp16(void *cdata, int task_id, float lhs_scale, float rhs_s
}
int ArithmeticGradCPUKernelFp16::Run() {
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ArithmeticGradRunFp16, this, 1);
int error_code = ParallelLaunch(this->context_, ArithmeticGradRunFp16, this, 1);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Arithmetic Grad function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -72,8 +72,7 @@ int ArithmeticSelfGradFp16Run(void *cdata, int task_id, float lhs_scale, float r
}
int ArithmeticSelfGradFp16CPUKernel::Run() {
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ArithmeticSelfGradFp16Run, this, thread_count_);
int error_code = ParallelLaunch(this->context_, ArithmeticSelfGradFp16Run, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -83,8 +83,7 @@ int BiasGradFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale)
}
int BiasGradCPUKernelFp16::Run() {
int error_code =
static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(BiasGradFp16Run, this, 1);
int error_code = ParallelLaunch(this->context_, BiasGradFp16Run, this, 1);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "bias function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -141,8 +141,7 @@ int BNGradCPUKernelFp16::Run() {
stage_ = 0;
thread_num_ = context_->thread_num_;
if (thread_num_ == 1) {
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(BNGradFp16Run, this, thread_num_);
int error_code = ParallelLaunch(this->context_, BNGradFp16Run, this, thread_num_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "BN function error error_code[" << error_code << "]";
return RET_ERROR;
@ -151,8 +150,7 @@ int BNGradCPUKernelFp16::Run() {
const std::vector<int> threads = {thread_num_, 1, thread_num_};
for (size_t stage = 0; stage < threads.size(); stage++) {
stage_ = static_cast<int>(stage);
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(BNGradFp16Run, this, threads.at(stage));
int error_code = ParallelLaunch(this->context_, BNGradFp16Run, this, threads.at(stage));
if (error_code != RET_OK) {
MS_LOG(ERROR) << "BN function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -191,8 +191,7 @@ int ConvolutionGradFilterCPUKernelFp16::Run() {
auto *out_dw = out_tensors_.at(0);
auto dw_addr = reinterpret_cast<float16_t *>(out_dw->data_c());
memset(dw_addr, 0, out_dw->Size());
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvolutionGradFilterFp16Run, this, context_->thread_num_);
int error_code = ParallelLaunch(this->context_, ConvolutionGradFilterFp16Run, this, context_->thread_num_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "conv filter function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -176,8 +176,7 @@ int ConvolutionGradInputCPUKernelFp16::Run() {
auto *out_dx = out_tensors_.at(0);
auto dx_addr = reinterpret_cast<float16_t *>(out_dx->data_c());
memset(dx_addr, 0, sizeof(float16_t) * batch * in_ch * in_h * in_w);
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvolutionGradInputFp16Run, this, context_->thread_num_);
int error_code = ParallelLaunch(this->context_, ConvolutionGradInputFp16Run, this, context_->thread_num_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "bias function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -82,8 +82,7 @@ int RunDropoutFp16Grad(void *cdata, int task_id, float lhs_scale, float rhs_scal
}
int DropoutGradCPUKernelFp16::Run() {
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(RunDropoutFp16Grad, this, thread_count_);
int error_code = ParallelLaunch(this->context_, RunDropoutFp16Grad, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Dropout Grad function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -96,8 +96,7 @@ int LayerNormF16GradRun(void *cdata, int task_id, float lhs_scale, float rhs_sca
}
int LayerNormGradCPUKernelFp16::Run() {
int error_code =
static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(LayerNormF16GradRun, this, 1);
int error_code = ParallelLaunch(this->context_, LayerNormF16GradRun, this, 1);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "LayerNorm function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -55,8 +55,7 @@ int NegGradCPUKernelFp16::DoNegGrad(int task_id) {
int NegGradCPUKernelFp16::ReSize() { return RET_OK; }
int NegGradCPUKernelFp16::Run() {
int ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(NegGradRun, this, thread_count_);
int ret = ParallelLaunch(this->context_, NegGradRun, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "parallel launch fail!ret: " << ret;
return ret;

View File

@ -99,8 +99,7 @@ int PoolingFp16GradImpl(void *cdata, int task_id, float lhs_scale, float rhs_sca
int PoolingGradCPUKernelFp16::Run() {
thread_num_ = context_->thread_num_;
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(PoolingFp16GradImpl, this, thread_num_);
int error_code = ParallelLaunch(this->context_, PoolingFp16GradImpl, this, thread_num_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -91,8 +91,7 @@ int ResizeGradCPUKernelFp16::Run() {
auto out_addr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
size_t elem_number = out_tensors_.at(0)->ElementsNum();
std::fill(out_addr, out_addr + elem_number, 0.f);
int error_code =
static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(ResizeFp16GradRun, this, 1);
int error_code = ParallelLaunch(this->context_, ResizeFp16GradRun, this, 1);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "ResizeGradCPUKernelFp16 function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -123,8 +123,7 @@ int StridedSliceFp16GradImpl(void *cdata, int task_id, float lhs_scale, float rh
}
int StridedSliceGradCPUKernelFp16::Run() {
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(StridedSliceFp16GradImpl, this, 1);
int error_code = ParallelLaunch(this->context_, StridedSliceFp16GradImpl, this, 1);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Strided slice error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -67,8 +67,7 @@ int UnsortedSegmentSumFp16Run(void *cdata, int task_id, float lhs_scale, float r
}
int UnsortedSegmentSumCPUKernelFp16::Run() {
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(UnsortedSegmentSumFp16Run, this, 1);
int error_code = ParallelLaunch(this->context_, UnsortedSegmentSumFp16Run, this, 1);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Strided slice error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -107,8 +107,7 @@ int ActivationRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
}
int ActivationCPUKernel::Run() {
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ActivationRun, this, thread_count_);
int error_code = ParallelLaunch(this->context_, ActivationRun, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -121,8 +121,7 @@ int AdderCPUKernel::Run() {
return RET_ERROR;
}
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(AdderImpl, this, thread_count_);
int error_code = ParallelLaunch(this->context_, AdderImpl, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "adder error error_code[" << error_code << "]";
FreeTmpBuffer();

View File

@ -88,8 +88,7 @@ int AddNCPUKernel::Run() {
in1_addr_ = input0_data;
in2_addr_ = input1_data;
out_addr_ = output_data;
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, AddNLaunch, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "addn launch fail!ret: " << ret;
return RET_ERROR;
@ -97,8 +96,7 @@ int AddNCPUKernel::Run() {
for (size_t i = 2; i < in_tensors_.size(); ++i) {
in1_addr_ = reinterpret_cast<float *>(in_tensors_[i]->MutableData());
in2_addr_ = output_data;
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, AddNLaunch, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i;
return RET_ERROR;

View File

@ -419,8 +419,7 @@ int ArithmeticCPUKernel::Run() {
input1_ptr_ = in_tensors_[1]->data_c();
}
output_ptr_ = out_tensors_[0]->data_c();
return static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ArithmeticsRun, this, op_parameter_->thread_num_);
return ParallelLaunch(this->context_, ArithmeticsRun, this, op_parameter_->thread_num_);
}
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MulFusion, LiteKernelCreator<ArithmeticCPUKernel>)

View File

@ -114,8 +114,7 @@ int ArithmeticSelfRun(void *cdata, int task_id, float lhs_scale, float rhs_scale
}
int ArithmeticSelfCPUKernel::Run() {
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ArithmeticSelfRun, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
}

View File

@ -75,8 +75,7 @@ int BatchnormCPUKernel::InitConstTensor() {
}
int BatchnormCPUKernel::Run() {
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, BatchNormRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
}

View File

@ -140,8 +140,7 @@ int CastCPUKernel::Run() {
if (data_num_ == 0) {
return RET_OK;
}
return static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(CastRun, this, op_parameter_->thread_num_);
return ParallelLaunch(this->context_, CastRun, this, op_parameter_->thread_num_);
}
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Cast, LiteKernelCreator<CastCPUKernel>)

View File

@ -69,8 +69,7 @@ int ConcatRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
}
int ConcatCPUKernel::Run() {
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConcatRun, this, op_parameter_->thread_num_);
int error_code = ParallelLaunch(this->context_, ConcatRun, this, op_parameter_->thread_num_);
return error_code;
}

View File

@ -256,12 +256,10 @@ int Convolution1x1CPUKernel::Run() {
}
if (multi_thread_by_hw_) {
static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(Convolution1x1RunHw, this, thread_count_);
ParallelLaunch(this->context_, Convolution1x1RunHw, this, thread_count_);
} else {
PackMatmulInput(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(Convolution1x1Run, this, thread_count_);
ParallelLaunch(this->context_, Convolution1x1Run, this, thread_count_);
}
}

View File

@ -133,8 +133,7 @@ int ConvolutionDepthwise3x3CPUKernel::Run() {
auto output_tensor = out_tensors_.at(kOutputIndex);
output_ptr_ = reinterpret_cast<float *>(output_tensor->data_c());
MS_ASSERT(output_ptr_ != nullptr);
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvDw3x3Run, this, conv_param_->thread_num_);
auto ret = ParallelLaunch(this->context_, ConvDw3x3Run, this, conv_param_->thread_num_);
ctx_->allocator->Free(buffer_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]";

View File

@ -116,8 +116,7 @@ int ConvolutionDepthwiseCPUKernel::Run() {
output_ptr_ = reinterpret_cast<float *>(output_tensor->data_c());
MS_ASSERT(output_ptr_ != nullptr);
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvDwRun, this, conv_param_->thread_num_);
auto ret = ParallelLaunch(this->context_, ConvDwRun, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]";
return RET_ERROR;

View File

@ -203,8 +203,7 @@ int ConvolutionDepthwiseIndirectCPUKernel::Run() {
MS_ASSERT(output_ptr_ != nullptr);
ConvDwInitIndirection(indirect_buffer_, packed_input_, zero_ptr_, conv_param_, step_h, step_w);
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvDwIndirectRun, this, conv_param_->thread_num_);
auto ret = ParallelLaunch(this->context_, ConvDwIndirectRun, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwIndirectRun error: error_code[" << ret << "]";
return RET_ERROR;

View File

@ -171,8 +171,7 @@ int ConvolutionDepthwiseSWCPUKernel::Run() {
packed_output_ = output_ptr;
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvDwSWRun, this, conv_param_->thread_num_);
ret = ParallelLaunch(this->context_, ConvDwSWRun, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]";
}

View File

@ -168,8 +168,7 @@ int ConvolutionDepthwiseSWCPUKernelX86::Run() {
packed_output_ = output_ptr;
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvDwSWAvxRun, this, conv_param_->thread_num_);
ret = ParallelLaunch(this->context_, ConvDwSWAvxRun, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwSWAvxRun error: error_code[" << ret << "]";
}

View File

@ -151,8 +151,7 @@ int ConvolutionCPUKernel::Run() {
PackWeight();
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvolutionImpl, this, thread_count_);
ret = ParallelLaunch(this->context_, ConvolutionImpl, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "conv error error_code[" << ret << "]";
}

View File

@ -183,8 +183,7 @@ int ConvolutionSWCPUKernel::Run() {
FreeTmpBuffer();
return ret;
}
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvolutionSWImpl, this, thread_count_);
int error_code = ParallelLaunch(this->context_, ConvolutionSWImpl, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "conv error error_code[" << error_code << "]";
FreeTmpBuffer();

View File

@ -224,8 +224,7 @@ int ConvolutionWinogradCPUKernel::Run() {
}
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ConvolutionWinogradImpl, this, thread_count_);
ret = ParallelLaunch(this->context_, ConvolutionWinogradImpl, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
}

View File

@ -158,8 +158,7 @@ int CropAndResizeCPUKernel::Run() {
return ret;
}
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(CropAndResizeImpl, this, op_parameter_->thread_num_);
int error_code = ParallelLaunch(this->context_, CropAndResizeImpl, this, op_parameter_->thread_num_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "CropAndResize run error, error_code[" << error_code << "]";
FreeTmpBuffer();

View File

@ -61,8 +61,7 @@ int CropCPUKernel::Run() {
return RET_OK;
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(CropLaunch, this, crop_para_->thread_count_);
auto ret = ParallelLaunch(this->context_, CropLaunch, this, crop_para_->thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Crop launch fail!ret: " << ret;
return RET_ERROR;

View File

@ -136,8 +136,7 @@ int CumSumCPUKernel::DoCumsumInt(int task_id) {
}
int CumSumCPUKernel::Run() {
int ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(CumsumLaunch, this, op_parameter_->thread_num_);
int ret = ParallelLaunch(this->context_, CumsumLaunch, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Crop launch fail!ret: " << ret;

View File

@ -177,8 +177,7 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
packed_output_ = output_addr;
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(DeconvDwRun, this, conv_param_->thread_num_);
ret = ParallelLaunch(this->context_, DeconvDwRun, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]";
}

View File

@ -233,8 +233,7 @@ int DeConvolutionCPUKernel::Run() {
RowMajor2Col12Major(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
#endif
error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(DeConvFp32Run, this, thread_count_);
error_code = ParallelLaunch(this->context_, DeConvFp32Run, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
FreeRunBuf();

View File

@ -411,8 +411,7 @@ int DeConvolutionWinogradCPUKernel::Run() {
nhwc_output_ = src_out + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_;
::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float));
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(DeConvWgFp32Run, this, deconv_param_->thread_num_);
ret = ParallelLaunch(this->context_, DeConvWgFp32Run, this, deconv_param_->thread_num_);
if (ret != RET_OK) {
FreeRunBuf();
MS_LOG(ERROR) << "DeConvWgFp32Run failed!";
@ -420,8 +419,7 @@ int DeConvolutionWinogradCPUKernel::Run() {
}
/* post bias activate and nhwc */
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(DeConvWgPostFp32Run, this, thread_num_hw_);
ret = ParallelLaunch(this->context_, DeConvWgPostFp32Run, this, thread_num_hw_);
if (ret != RET_OK) {
FreeRunBuf();
MS_LOG(ERROR) << "DeConvWgPostFp32Run failed!";

View File

@ -58,8 +58,7 @@ int EluRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
}
int EluCPUKernel::Run() {
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(EluRun, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, EluRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]";
return RET_ERROR;

View File

@ -86,8 +86,7 @@ int EmbeddingLookupCPUKernel::Run() {
memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum());
dest_loc += in_tensors_.at(i)->ElementsNum();
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(EmbeddingLookupRun, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, EmbeddingLookupRun, this, op_parameter_->thread_num_);
FreeRunBuff();
if (ret != RET_OK) {
MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]";

View File

@ -72,8 +72,7 @@ int ExpCPUKernel::Run() {
output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum();
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(ExpRun, this, exp_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, ExpRun, this, exp_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Exp error: error_code[" << ret << "]";
return RET_ERROR;

View File

@ -90,8 +90,7 @@ int FillCPUKernel::Run() {
MS_LOG(ERROR) << "unsupported fill data type " << fill_input->data_type();
return RET_ERROR;
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(FillRun, this, thread_sz_count_);
auto ret = ParallelLaunch(this->context_, FillRun, this, thread_sz_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "FillRun error error_code[" << ret << "]";
return ret;

View File

@ -91,8 +91,7 @@ int FusedBatchnormCPUKernel::Run() {
trained_ = true; // trained at least once
}
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, BatchNormRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
}

View File

@ -127,8 +127,7 @@ int GatherNdCPUKernel::Run() {
in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
InitOffset();
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(GatherNdRun, this, thread_sz_count_);
auto ret = ParallelLaunch(this->context_, GatherNdRun, this, thread_sz_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "gatherNd error error_code[" << ret << "]";
return ret;

View File

@ -91,8 +91,7 @@ int GatherCPUKernel::Run() {
return ret;
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(GatherRun, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, GatherRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]";
}

View File

@ -66,8 +66,7 @@ int InstanceNormCPUKernel::Run() {
gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->data_c());
beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->data_c());
dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(InstanceNormRun, this, op_parameter_->thread_num_);
auto ret = ParallelLaunch(this->context_, InstanceNormRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]";
return ret;

View File

@ -146,8 +146,7 @@ int L2NormCPUKernel::Run() {
int ret;
if (l2_norm_param_->axis_num_ == 0 || l2_norm_param_->axis_num_ == input_shape.size()) {
// all axis
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(SquareSumRun, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, SquareSumRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
return RET_ERROR;
@ -157,15 +156,13 @@ int L2NormCPUKernel::Run() {
sum += tmp_sum_[i];
}
sqrt_sum_ = sqrt(sum > l2_norm_param_->epsilon_ ? sum : l2_norm_param_->epsilon_);
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(L2NormRun, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, L2NormRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
return RET_ERROR;
}
} else if (l2_norm_param_->axis_num_ == 1 && l2_norm_param_->axis_[0] == static_cast<int>(input_shape.size()) - 1) {
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(L2NormTrailingAxisRun, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, L2NormTrailingAxisRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
return RET_ERROR;

View File

@ -92,8 +92,7 @@ int LayerNormCPUKernel::Run() {
mean_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float)));
var_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float)));
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(LayerNormRun, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, LayerNormRun, this, op_parameter_->thread_num_);
if (out_tensors_.size() != 3) {
context_->allocator->Free(mean_data_);
context_->allocator->Free(var_data_);

View File

@ -72,8 +72,7 @@ int LocalResponseNormRun(void *cdata, int task_id, float lhs_scale, float rhs_sc
}
int LocalResponseNormCPUKernel::Run() {
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(LocalResponseNormRun, this, thread_count_);
int error_code = ParallelLaunch(this->context_, LocalResponseNormRun, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "LocalResponseNorm function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -96,8 +96,7 @@ int LogSoftmaxLastAxisRun(void *cdata, int task_id, float lhs_scale, float rhs_s
int LogSoftmaxCPUKernel::Run() {
int ret = RET_OK;
if (in_plane_size_ == 1) {
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(LogSoftmaxLastAxisRun, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, LogSoftmaxLastAxisRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "LogSoftmaxCPUKernel ParallelLaunch failed, ret: " << ret;
}

View File

@ -60,8 +60,7 @@ int LshProjectionCPUKernel::Run() {
if (ret != RET_OK) {
return ret;
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(LshProjectionRun, this, op_parameter_->thread_num_);
ret = ParallelLaunch(this->context_, LshProjectionRun, this, op_parameter_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "LshProjection kernel parallel launch failed";
}

View File

@ -426,8 +426,7 @@ int MatmulFp32BaseCPUKernel::Run() {
// need not aligned
batch_c_ptr_ = output_data_ + i * params_->row_ * params_->col_;
}
ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(MatmulBaseFloatRun, this, thread_count_);
ret = ParallelLaunch(this->context_, MatmulBaseFloatRun, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "MatmulBaseFloatRun failed";
}

View File

@ -180,8 +180,7 @@ int OneHotCPUKernel::GetParams() {
}
int OneHotCPUKernel::Run() {
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(RunOneHot, this, op_parameter_->thread_num_);
int error_code = ParallelLaunch(this->context_, RunOneHot, this, op_parameter_->thread_num_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "OneHot function error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -408,8 +408,7 @@ int PadCPUKernel::Run() {
output_data[i] = pad_param_->constant_value_;
}
}
error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(PadImpl, this, op_parameter_->thread_num_);
error_code = ParallelLaunch(this->context_, PadImpl, this, op_parameter_->thread_num_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]";
return RET_ERROR;
@ -422,8 +421,7 @@ int PadCPUKernel::Run() {
return error_code;
}
error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(MirrorPadImpl, this, op_parameter_->thread_num_);
error_code = ParallelLaunch(this->context_, MirrorPadImpl, this, op_parameter_->thread_num_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -84,8 +84,7 @@ int PoolingImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
}
int PoolingCPUKernel::Run() {
int error_code = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(PoolingImpl, this, thread_count_);
int error_code = ParallelLaunch(this->context_, PoolingImpl, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
return RET_ERROR;

View File

@ -40,8 +40,7 @@ int PowerImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
}
int PowerCPUKernel::Run() {
auto ret = static_cast<const lite::InnerContext *>(this->context_)
->thread_pool_->ParallelLaunch(PowerImpl, this, thread_count_);
auto ret = ParallelLaunch(this->context_, PowerImpl, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "PowerCPUKernel error: " << ret;
return RET_ERROR;

Some files were not shown because too many files have changed in this diff Show More