diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index a861eb8f86..5c89c4a99b 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -166,7 +166,6 @@ if (BUILD_DEVICE) add_compile_definitions(ENABLE_ARM32) endif () if (PLATFORM_ARM64) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16") add_compile_definitions(ENABLE_ARM64) if (ENABLE_FP16) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16") diff --git a/mindspore/lite/include/context.h b/mindspore/lite/include/context.h index 64ffa42fd4..7206c31cfc 100644 --- a/mindspore/lite/include/context.h +++ b/mindspore/lite/include/context.h @@ -20,6 +20,7 @@ #include #include #include "include/ms_tensor.h" +#include "include/thread_pool_config.h" namespace mindspore::lite { /// \brief Allocator defined a memory pool for malloc memory and free memory dynamically. @@ -27,13 +28,6 @@ namespace mindspore::lite { /// \note List public class and interface for reference. class Allocator; -/// \brief CpuBindMode defined for holding bind cpu strategy argument. -enum CpuBindMode { - MID_CPU = -1, /**< bind middle cpu first */ - HIGHER_CPU = 1, /**< bind higher cpu first */ - NO_BIND = 0 /**< no bind */ -}; - /// \brief DeviceType defined for holding user's preferred backend. typedef enum { DT_CPU, /**< CPU device type */ diff --git a/mindspore/lite/include/thread_pool_config.h b/mindspore/lite/include/thread_pool_config.h new file mode 100644 index 0000000000..8a5dead47d --- /dev/null +++ b/mindspore/lite/include/thread_pool_config.h @@ -0,0 +1,35 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_INCLUDE_THREAD_POOL_CONFIG_H_ +#define MINDSPORE_LITE_INCLUDE_THREAD_POOL_CONFIG_H_ + +/// \brief CpuBindMode defined for holding bind cpu strategy argument. +typedef enum Mode { + MID_CPU = -1, /**< bind middle cpu first */ + HIGHER_CPU = 1, /**< bind higher cpu first */ + NO_BIND = 0 /**< no bind */ +} CpuBindMode; + +/// \brief ThreadPoolId defined for specifying which thread pool to use. +typedef enum Id { + THREAD_POOL_DEFAULT = 0, /**< default thread pool id */ + THREAD_POOL_SECOND = 1, /**< the second thread pool id */ + THREAD_POOL_THIRD = 2, /**< the third thread pool id */ + THREAD_POOL_FOURTH = 3 /**< the fourth thread pool id */ +} ThreadPoolId; + +#endif // LITE_MINDSPORE_LITE_INCLUDE_THREAD_POOL_CONFIG_H_ diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt index fbedb15bcf..1fef039dbf 100644 --- a/mindspore/lite/src/CMakeLists.txt +++ b/mindspore/lite/src/CMakeLists.txt @@ -3,7 +3,7 @@ set(LITE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/ms_tensor_utils.cc ${CMAKE_CURRENT_SOURCE_DIR}/runtime/allocator.cc ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_api.cc - ${CMAKE_CURRENT_SOURCE_DIR}/runtime/thread_pool.cc + ${CMAKE_CURRENT_SOURCE_DIR}/runtime/thread_pool.c ${CMAKE_CURRENT_SOURCE_DIR}/runtime/workspace_pool.cc ${CMAKE_CURRENT_SOURCE_DIR}/ir/tensor.cc ${CMAKE_CURRENT_SOURCE_DIR}/context.cc diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc index 7c253852ac..d08ecb7ae9 100644 --- a/mindspore/lite/src/lite_session.cc +++ b/mindspore/lite/src/lite_session.cc @@ -247,7 +247,6 @@ std::vector LiteSession::GetInputs() const { retu int LiteSession::RunGraph(const session::KernelCallBack &before, const session::KernelCallBack &after) { MS_EXCEPTION_IF_NULL(this->context_); - SetMaxWokerNum(context_->thread_num_); if (before == nullptr && after == nullptr) { return executor->Run(this->inputs_, this->outputs_, this->kernels_, this->context_->allocator.get()); } else { @@ -264,7 +263,7 @@ int LiteSession::Init(Context *context) { } this->context_->float16_priority = context->float16_priority; this->context_->cpu_bind_mode_ = context->cpu_bind_mode_; - ConfigThreadPool(context->cpu_bind_mode_, context->thread_num_); + ConfigThreadPool(THREAD_POOL_DEFAULT, context->thread_num_, context->cpu_bind_mode_); auto ret = KernelRegistry::GetInstance()->Init(); if (ret != RET_OK) { MS_LOG(ERROR) << "KernelRegistry Init Failed."; @@ -283,7 +282,7 @@ int LiteSession::Init(Context *context) { void LiteSession::BindThread(bool if_bind) { if (this->context_->cpu_bind_mode_ != NO_BIND) { - DoAllThreadBind(if_bind, static_cast(this->context_->cpu_bind_mode_)); + BindThreads(THREAD_POOL_DEFAULT, if_bind, this->context_->cpu_bind_mode_); } } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc b/mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc index 518a7896b9..9cf795c862 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc @@ -153,7 +153,7 @@ int PriorBoxCPUKernel::PriorBoxImpl(int task_id) { return ret; } -int RunPriorBox(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int RunPriorBox(void *cdata, int task_id) { auto prior_box = reinterpret_cast(cdata); auto error_code = prior_box->PriorBoxImpl(task_id); @@ -170,7 +170,7 @@ int PriorBoxCPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail! Ret error code[" << prepare_ret << "]"; return prepare_ret; } - int error_code = LiteBackendParallelLaunch(RunPriorBox, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, RunPriorBox, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "PriorBox run error, error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc index 60287713e2..a1ba123cf5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc @@ -95,7 +95,7 @@ int QuantDTypeCastCPUKernel::QuantDTypeCast(int task_id) { return RET_OK; } -int QuantDTypeCastRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int QuantDTypeCastRun(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->QuantDTypeCast(task_id); if (ret != RET_OK) { @@ -119,7 +119,7 @@ int QuantDTypeCastCPUKernel::Run() { int8_ptr_ = reinterpret_cast(out_tensors_[0]->Data()); } - auto ret = LiteBackendParallelLaunch(QuantDTypeCastRun, this, thread_n_num_); + auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, QuantDTypeCastRun, this, thread_n_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc index b2fbf6e81d..46dc6dc63e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc @@ -92,7 +92,7 @@ int ActivationFp16CPUKernel::DoActivation(int task_id) { return error_code; } -int ActivationRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ActivationRun(void *cdata, int task_id) { auto activation_kernel = reinterpret_cast(cdata); auto error_code = activation_kernel->DoActivation(task_id); if (error_code != RET_OK) { @@ -115,7 +115,7 @@ int ActivationFp16CPUKernel::Run() { return ret; } - int error_code = LiteBackendParallelLaunch(ActivationRun, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ActivationRun, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc index 7cb45b00f0..69521196e4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc @@ -362,7 +362,7 @@ int ArithmeticFP16CPUKernel::DoArithmetic(int task_id) { return RET_OK; } -static int ArithmeticsRun_Fp16(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int ArithmeticsRun_Fp16(void *cdata, int task_id) { auto arithmetic_kernel = reinterpret_cast(cdata); auto error_code = arithmetic_kernel->DoArithmetic(task_id); if (error_code != RET_OK) { @@ -413,7 +413,7 @@ int ArithmeticFP16CPUKernel::Run() { Float32ToFloat16(reinterpret_cast(in_tensors_[1]->Data()), input1_fp16_, arithmeticParameter_->in_elements_num1_); } - ret = LiteBackendParallelLaunch(ArithmeticsRun_Fp16, this, context_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ArithmeticsRun_Fp16, this, context_->thread_num_); return ret; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc index 8faee1f705..8805e384a8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc @@ -65,7 +65,7 @@ int BatchnormFp16CPUKernel::Run() { input_ = in_tensors_.at(0)->Data(); output_ = out_tensors_.at(0)->Data(); } - ret = LiteBackendParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, BatchNormRun, this, op_parameter_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc index d67ab064d8..da776d2cc9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc @@ -30,13 +30,13 @@ using mindspore::schema::PrimitiveType_Cast; namespace mindspore::kernel { namespace { -int CastRun(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { +int CastRun(void *cdata, int task_id) { if (cdata == nullptr) { MS_LOG(ERROR) << "input cdata is nullptr!"; return RET_ERROR; } - return reinterpret_cast(cdata)->DoCast(thread_id); + return reinterpret_cast(cdata)->DoCast(task_id); } } // namespace @@ -91,7 +91,7 @@ int CastFp16CPUKernel::Run() { if (data_num_ == 0) { return RET_OK; } - return LiteBackendParallelLaunch(CastRun, this, op_parameter_->thread_num_); + return ParallelLaunch(THREAD_POOL_DEFAULT, CastRun, this, op_parameter_->thread_num_); } kernel::LiteKernel *CpuCastFp16KernelCreator(const std::vector &inputs, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc index 2ccc2957fe..1708b11105 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc @@ -194,7 +194,7 @@ int Convolution1x1FP16CPUKernel::RunImpl(int task_id) { return RET_OK; } -static int Convolution1x1Fp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int Convolution1x1Fp16Impl(void *cdata, int task_id) { auto conv = reinterpret_cast(cdata); auto error_code = conv->RunImpl(task_id); if (error_code != RET_OK) { @@ -222,7 +222,7 @@ int Convolution1x1FP16CPUKernel::Run() { execute_input_ + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_, execute_output_ + batch_index * matmul_param_->row_ * matmul_param_->col_); - int error_code = LiteBackendParallelLaunch(Convolution1x1Fp16Impl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, Convolution1x1Fp16Impl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv1x1 fp16 error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc index 8c9343ba3f..5dff52083a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc @@ -197,7 +197,7 @@ int Convolution3x3FP16CPUKernel::RunImpl(int task_id) { return RET_OK; } -static int Convolution3x3Fp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int Convolution3x3Fp16Impl(void *cdata, int task_id) { auto conv = reinterpret_cast(cdata); auto error_code = conv->RunImpl(task_id); if (error_code != RET_OK) { @@ -251,7 +251,7 @@ int Convolution3x3FP16CPUKernel::Run() { int in_channel = conv_param_->input_channel_; PackNHWCToNHWC8Fp16(reinterpret_cast(execute_input_), nhwc4_input_, in_batch, in_h * in_w, in_channel); - int error_code = LiteBackendParallelLaunch(Convolution3x3Fp16Impl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, Convolution3x3Fp16Impl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv3x3 fp16 error error_code[" << error_code << "]"; FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc index 96f2b3d9b5..fadaa906a5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc @@ -98,7 +98,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { return RET_OK; } -static int ConvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int ConvDwFp16Run(void *cdata, int task_id) { auto conv_dw_fp16 = reinterpret_cast(cdata); auto ret = conv_dw_fp16->Execute(task_id); if (ret != RET_OK) { @@ -125,7 +125,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() { return ret; } - ret = LiteBackendParallelLaunch(ConvDwFp16Run, this, conv_param_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConvDwFp16Run, this, conv_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc index a7ebff8c6e..4e8aa956f8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc @@ -129,7 +129,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Execute(int task_id) { return RET_OK; } -static int ConvDwSWFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int ConvDwSWFp16Run(void *cdata, int task_id) { auto conv_dw_fp16 = reinterpret_cast(cdata); auto ret = conv_dw_fp16->Execute(task_id); if (ret != RET_OK) { @@ -171,7 +171,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { packed_output_ = execute_output_; } - ret = LiteBackendParallelLaunch(ConvDwSWFp16Run, this, conv_param_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConvDwSWFp16Run, this, conv_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc index ee040f8443..7f7ee28625 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc @@ -177,7 +177,7 @@ int ConvolutionFP16CPUKernel::RunImpl(int task_id) { return RET_OK; } -static int ConvolutionFp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int ConvolutionFp16Impl(void *cdata, int task_id) { auto conv = reinterpret_cast(cdata); auto error_code = conv->RunImpl(task_id); if (error_code != RET_OK) { @@ -206,7 +206,7 @@ int ConvolutionFP16CPUKernel::Run() { int in_channel = conv_param_->input_channel_; convert_func_(reinterpret_cast(execute_input_), nhwc4_input_, in_batch, in_h * in_w, in_channel); - int error_code = LiteBackendParallelLaunch(ConvolutionFp16Impl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionFp16Impl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv fp16 error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.cc index 7b7d1e17b3..27def2e8fb 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.cc @@ -186,7 +186,7 @@ int ConvolutionSWFP16CPUKernel::RunImpl(int task_id) { return RET_OK; } -static int ConvolutionSWFp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int ConvolutionSWFp16Impl(void *cdata, int task_id) { auto conv = reinterpret_cast(cdata); auto error_code = conv->RunImpl(task_id); if (error_code != RET_OK) { @@ -219,7 +219,7 @@ int ConvolutionSWFP16CPUKernel::Run() { int in_channel = conv_param_->input_channel_; convert_func_(reinterpret_cast(execute_input_), nhwc4_input_, in_batch, in_h * in_w, in_channel); - int error_code = LiteBackendParallelLaunch(ConvolutionSWFp16Impl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionSWFp16Impl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv fp16 error error_code[" << error_code << "]"; FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc index cb7526f827..4cb862a43f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc @@ -347,7 +347,7 @@ int ConvolutionWinogradFP16CPUKernel::RunImpl(int task_id) { return RET_OK; } -static int ConvolutionWinogradFp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int ConvolutionWinogradFp16Impl(void *cdata, int task_id) { auto conv = reinterpret_cast(cdata); auto error_code = conv->RunImpl(task_id); if (error_code != RET_OK) { @@ -404,7 +404,7 @@ int ConvolutionWinogradFP16CPUKernel::Run() { int in_channel = conv_param_->input_channel_; PackNHWCToNHWC8Fp16(execute_input_, nhwc4_input_, in_batch, in_h * in_w, in_channel); - int error_code = LiteBackendParallelLaunch(ConvolutionWinogradFp16Impl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionWinogradFp16Impl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv winograd error error_code[" << error_code << "]"; FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc index 25656b1b4f..8018f43f63 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc @@ -137,7 +137,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { return RET_OK; } -static int DeconvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int DeconvDwFp16Run(void *cdata, int task_id) { auto deconv_dw_fp16 = reinterpret_cast(cdata); auto ret = deconv_dw_fp16->Execute(task_id); if (ret != RET_OK) { @@ -178,7 +178,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { if (!need_align_) { packed_output_ = execute_output_; } - ret = LiteBackendParallelLaunch(DeconvDwFp16Run, this, conv_param_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, DeconvDwFp16Run, this, conv_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc index 0deb852cf9..817bb91497 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc @@ -137,7 +137,7 @@ void DeConvolutionFp16CPUKernel::FreeRunBuf() { return; } -static int DeConvFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int DeConvFp16Run(void *cdata, int task_id) { auto deconv = reinterpret_cast(cdata); auto error_code = deconv->DoDeconv(task_id); if (error_code != RET_OK) { @@ -188,7 +188,7 @@ int DeConvolutionFp16CPUKernel::Run() { for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { RowMajor2Col16MajorFp16(execute_input_, pack_input_, input_plane_, conv_param_->input_channel_); - error_code = LiteBackendParallelLaunch(DeConvFp16Run, this, thread_count_); + error_code = ParallelLaunch(THREAD_POOL_DEFAULT, DeConvFp16Run, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc index dddad9ae9b..7e0ace1d5e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc @@ -89,7 +89,7 @@ int PoolingFp16CPUKernel::RunImpl(int task_id) { return RET_OK; } -static int PoolingFp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int PoolingFp16Impl(void *cdata, int task_id) { auto pooling = reinterpret_cast(cdata); auto error_code = pooling->RunImpl(task_id); if (error_code != RET_OK) { @@ -109,7 +109,7 @@ int PoolingFp16CPUKernel::Run() { auto input_ptr = reinterpret_cast(in_tensors_.at(kInputIndex)->Data()); Float32ToFloat16(input_ptr, fp16_input_, ele_num); - int error_code = LiteBackendParallelLaunch(PoolingFp16Impl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, PoolingFp16Impl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc index 3bc9d21ada..5b689b0595 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc @@ -67,7 +67,7 @@ int ReduceFp16CPUKernel::CallReduceUnit(int task_id) { return ret; } -static int ReduceImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int ReduceImpl(void *cdata, int task_id) { auto reduce = reinterpret_cast(cdata); auto error_code = reduce->CallReduceUnit(task_id); if (error_code != RET_OK) { @@ -112,7 +112,7 @@ int ReduceFp16CPUKernel::Run() { inner_size_ *= tmp_shape_[k]; } axis_size_ = tmp_shape_[axis]; - auto error_code = LiteBackendParallelLaunch(ReduceImpl, this, context_->thread_num_); + auto error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ReduceImpl, this, context_->thread_num_); if (error_code != RET_OK) { FreeTmpBuffer(); MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.cc index 8182d75f28..3a4e9f41cc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.cc @@ -63,7 +63,7 @@ int SplitFp16CPUKernel::Split(int task_id) { return RET_OK; } -static int SplitRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int SplitRun(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->Split(task_id); if (ret != RET_OK) { @@ -97,7 +97,7 @@ int SplitFp16CPUKernel::Run() { output_ptr_[i] = reinterpret_cast(out_tensors_.at(i)->Data()); } } - ret = LiteBackendParallelLaunch(SplitRun, this, thread_n_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, SplitRun, this, thread_n_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "split error error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc index 20e15f0fbf..eca0714e18 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc @@ -117,7 +117,7 @@ int TransposeFp16CPUKernel::TransposeParallel(int task_id) { return RET_OK; } -static int TransposeRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +static int TransposeRun(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->TransposeParallel(task_id); if (ret != RET_OK) { @@ -162,7 +162,7 @@ int TransposeFp16CPUKernel::Run() { in_shape_ = const_cast(in_tensor->shape().data()); out_shape_ = const_cast(out_tensor->shape().data()); - ret = LiteBackendParallelLaunch(TransposeRun, this, thread_h_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, TransposeRun, this, thread_h_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "Tranpose error error_code[" << ret << "]"; FreeFp16Buffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc index 41b76206a9..3b61a0c7ca 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc @@ -67,7 +67,7 @@ int ActivationCPUKernel::DoActivation(int task_id) { return RET_OK; } -int ActivationRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ActivationRun(void *cdata, int task_id) { auto activation_kernel = reinterpret_cast(cdata); auto error_code = activation_kernel->DoActivation(task_id); if (error_code != RET_OK) { @@ -83,7 +83,7 @@ int ActivationCPUKernel::Run() { MS_LOG(ERROR) << "Prepare failed."; return ret; } - int error_code = LiteBackendParallelLaunch(ActivationRun, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ActivationRun, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc index 67e1b24697..c5cb1b6d07 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc @@ -28,13 +28,13 @@ using mindspore::schema::PrimitiveType_AddN; namespace mindspore::kernel { namespace { -int AddNLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { +int AddNLaunch(void *cdata, int task_id) { if (cdata == nullptr) { MS_LOG(ERROR) << "Input cdata is nullptr!"; return RET_NULL_PTR; } auto kernel = reinterpret_cast(cdata); - return kernel->AddNParallelRun(thread_id); + return kernel->AddNParallelRun(task_id); } } // namespace @@ -74,7 +74,7 @@ int AddNCPUKernel::Run() { in1_addr_ = input0_data; in2_addr_ = input1_data; out_addr_ = output_data; - ret = LiteBackendParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, AddNLaunch, this, op_parameter_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "addn launch fail!ret: " << ret; return RET_ERROR; @@ -82,7 +82,7 @@ int AddNCPUKernel::Run() { for (size_t i = 2; i < in_tensors_.size(); ++i) { in1_addr_ = reinterpret_cast(in_tensors_[i]->Data()); in2_addr_ = output_data; - ret = LiteBackendParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, AddNLaunch, this, op_parameter_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc index d3c322744a..6a72842ce5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc @@ -163,7 +163,7 @@ int ArithmeticCPUKernel::DoArithmetic(int task_id) { return RET_OK; } -int ArithmeticsRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ArithmeticsRun(void *cdata, int task_id) { auto arithmetic_kernel = reinterpret_cast(cdata); auto error_code = arithmetic_kernel->DoArithmetic(task_id); if (error_code != RET_OK) { @@ -193,7 +193,7 @@ int ArithmeticCPUKernel::Run() { ComputeStrides(arithmeticParameter_->out_shape_, arithmeticParameter_->out_strides_, arithmeticParameter_->ndim_); } - int error_code = LiteBackendParallelLaunch(ArithmeticsRun, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ArithmeticsRun, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Arithmetic function error error_code[" << error_code << "]"; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc index 57fd294072..75d568b609 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc @@ -41,7 +41,7 @@ int ArithmeticSelfCPUKernel::ReSize() { return RET_OK; } -int ArithmeticSelfRuns(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ArithmeticSelfRuns(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->DoArithmeticSelf(task_id); if (ret != RET_OK) { @@ -80,7 +80,7 @@ int ArithmeticSelfCPUKernel::Run() { auto out_tensor = out_tensors_.at(0); in_ptr_ = reinterpret_cast(input_tensor->Data()); out_ptr_ = reinterpret_cast(out_tensor->Data()); - ret = LiteBackendParallelLaunch(ArithmeticSelfRuns, this, thread_sz_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ArithmeticSelfRuns, this, thread_sz_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc index 6bfa90c763..050b868d63 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc @@ -75,7 +75,7 @@ int BatchnormCPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret; return ret; } - ret = LiteBackendParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, BatchNormRun, this, op_parameter_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; } @@ -88,7 +88,7 @@ int BatchnormCPUKernel::DoExecute(int task_id) { return mindspore::lite::RET_OK; } -int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int BatchNormRun(void *cdata, int task_id) { auto kernel = reinterpret_cast(cdata); auto ret = kernel->DoExecute(task_id); if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h index 3261f4a06f..e759058618 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h @@ -48,7 +48,7 @@ class BatchnormCPUKernel : public LiteKernel { void *variance_ = nullptr; }; -int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata); +int BatchNormRun(void *cdata, int task_id); } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BATCHNORM_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc index 2e984644bf..4d10d0fb81 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc @@ -30,13 +30,13 @@ using mindspore::schema::PrimitiveType_Cast; namespace mindspore::kernel { namespace { -int CastRun(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { +int CastRun(void *cdata, int task_id) { if (cdata == nullptr) { MS_LOG(ERROR) << "input cdata is nullptr!"; return RET_ERROR; } - return reinterpret_cast(cdata)->DoCast(thread_id); + return reinterpret_cast(cdata)->DoCast(task_id); } } // namespace @@ -111,7 +111,7 @@ int CastCPUKernel::Run() { if (data_num_ == 0) { return RET_OK; } - return LiteBackendParallelLaunch(CastRun, this, op_parameter_->thread_num_); + return ParallelLaunch(THREAD_POOL_DEFAULT, CastRun, this, op_parameter_->thread_num_); } kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector &inputs, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/constant_of_shape.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/constant_of_shape.cc index 54d49ef017..b3330d9479 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/constant_of_shape.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/constant_of_shape.cc @@ -41,7 +41,7 @@ int ConstantOfShapeCPUKernel::DoExecute(int task_id) { return RET_OK; } -int ConstantOfShapeRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ConstantOfShapeRun(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->DoExecute(task_id); if (ret != RET_OK) { @@ -62,7 +62,7 @@ int ConstantOfShapeCPUKernel::Run() { param_->unit_ = UP_DIV(param_->element_sz_, thread_num); param_->op_parameter_.thread_num_ = thread_num; out_ptr_ = reinterpret_cast(out_tensors_.front()->Data()); - auto ret = LiteBackendParallelLaunch(ConstantOfShapeRun, this, thread_num); + auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConstantOfShapeRun, this, thread_num); if (ret != RET_OK) { MS_LOG(ERROR) << "ConstantOfShapeRun error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc index 641a8570f3..32d9c6e3fa 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc @@ -172,7 +172,7 @@ int ConvolutionCPUKernel::RunImpl(int task_id) { return RET_OK; } -int ConvolutionImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ConvolutionImpl(void *cdata, int task_id) { auto conv = reinterpret_cast(cdata); auto error_code = conv->RunImpl(task_id); if (error_code != RET_OK) { @@ -200,7 +200,7 @@ int ConvolutionCPUKernel::Run() { PackNHWCToNHWC4Fp32(ori_input_data, nhwc4_input_, conv_param_->input_batch_, conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); - int error_code = LiteBackendParallelLaunch(ConvolutionImpl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionImpl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv error error_code[" << error_code << "]"; FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc index f4c63ec4b7..56e1cbe492 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc @@ -149,7 +149,7 @@ int Convolution1x1CPUKernel::DoConv1x1(int task_id) { return RET_OK; } -int Convolution1x1Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int Convolution1x1Run(void *cdata, int task_id) { auto conv1x1 = reinterpret_cast(cdata); auto error_code = conv1x1->DoConv1x1(task_id); if (error_code != RET_OK) { @@ -179,7 +179,7 @@ int Convolution1x1CPUKernel::Run() { Pre1x1Trans(src_in + batch_index * conv_param_->input_h_ * conv_param_->input_w_ * conv_param_->input_channel_, src_out + batch_index * matmul_param_->row_ * matmul_param_->col_); - int error_code = LiteBackendParallelLaunch(Convolution1x1Run, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, Convolution1x1Run, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv1x1 strassen error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc index fbc15eb739..9d384c0f1e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc @@ -212,7 +212,7 @@ int Convolution3x3CPUKernel::RunImpl(int task_id) { return RET_OK; } -int Convolution3x3Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int Convolution3x3Impl(void *cdata, int task_id) { auto conv3x3 = reinterpret_cast(cdata); auto error_code = conv3x3->RunImpl(task_id); if (error_code != RET_OK) { @@ -262,7 +262,7 @@ int Convolution3x3CPUKernel::Run() { PackNHWCToNHWC4Fp32(ori_input_data, nhwc4_input_, conv_param_->input_batch_, conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); - int error_code = LiteBackendParallelLaunch(Convolution3x3Impl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, Convolution3x3Impl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv3x3 error error_code[" << error_code << "]"; FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc index fe11e5fc1b..53ea4cf09f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc @@ -89,7 +89,7 @@ int ConvolutionDepthwiseCPUKernel::Execute(int task_id) { return RET_OK; } -int ConvDwRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ConvDwRun(void *cdata, int task_id) { auto conv_dw = reinterpret_cast(cdata); auto ret = conv_dw->Execute(task_id); if (ret != RET_OK) { @@ -116,7 +116,7 @@ int ConvolutionDepthwiseCPUKernel::Run() { auto output_tensor = out_tensors_.at(kOutputIndex); output_ptr_ = reinterpret_cast(output_tensor->Data()); - ret = LiteBackendParallelLaunch(ConvDwRun, this, conv_param_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConvDwRun, this, conv_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc index c21e5c53f2..10ed18bb03 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow.cc @@ -123,7 +123,7 @@ int ConvolutionDepthwiseSWCPUKernel::Execute(int task_id) { return RET_OK; } -int ConvDwSWRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ConvDwSWRun(void *cdata, int task_id) { auto conv_dw = reinterpret_cast(cdata); auto ret = conv_dw->Execute(task_id); if (ret != RET_OK) { @@ -167,7 +167,7 @@ int ConvolutionDepthwiseSWCPUKernel::Run() { packed_output_ = output_ptr; } - ret = LiteBackendParallelLaunch(ConvDwSWRun, this, conv_param_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConvDwSWRun, this, conv_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.cc index 11c561133c..8489e8151c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.cc @@ -159,7 +159,7 @@ int ConvolutionSWCPUKernel::RunImpl(int task_id) { return RET_OK; } -int ConvolutionSWImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ConvolutionSWImpl(void *cdata, int task_id) { auto conv = reinterpret_cast(cdata); auto error_code = conv->RunImpl(task_id); if (error_code != RET_OK) { @@ -187,7 +187,7 @@ int ConvolutionSWCPUKernel::Run() { PackNHWCToNHWC4Fp32(ori_input_data, nhwc4_input_, conv_param_->input_batch_, conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); - int error_code = LiteBackendParallelLaunch(ConvolutionSWImpl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionSWImpl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv error error_code[" << error_code << "]"; FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc index ad5596d052..d31f1059de 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc @@ -343,7 +343,7 @@ int ConvolutionWinogradCPUKernel::RunImpl(int task_id) { return RET_OK; } -int ConvolutionWinogradImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ConvolutionWinogradImpl(void *cdata, int task_id) { auto conv = reinterpret_cast(cdata); auto error_code = conv->RunImpl(task_id); if (error_code != RET_OK) { @@ -395,7 +395,7 @@ int ConvolutionWinogradCPUKernel::Run() { PackNHWCToNHWC4Fp32(ori_input_data, nhwc4_input_, conv_param_->input_batch_, conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); - int error_code = LiteBackendParallelLaunch(ConvolutionWinogradImpl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionWinogradImpl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv winograd error error_code[" << error_code << "]"; FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc index b8c4bca55f..711db31678 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc @@ -30,13 +30,13 @@ using mindspore::schema::PrimitiveType_Crop; namespace mindspore::kernel { namespace { -int CropLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { +int CropLaunch(void *cdata, int task_id) { if (cdata == nullptr) { MS_LOG(ERROR) << "Input cdata is nullptr!"; return RET_NULL_PTR; } auto kernel = reinterpret_cast(cdata); - return kernel->CropParallelRun(thread_id); + return kernel->CropParallelRun(task_id); } } // namespace @@ -68,7 +68,7 @@ int CropCPUKernel::Run() { return RET_OK; } - auto ret = LiteBackendParallelLaunch(CropLaunch, this, param->op_parameter_.thread_num_); + auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, CropLaunch, this, param->op_parameter_.thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "Crop launch fail!ret: " << ret; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc index 82eaca56ef..bc831df25a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution.cc @@ -109,7 +109,7 @@ int DeConvolutionCPUKernel::InitParam() { return RET_OK; } -int DeConvFp32Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int DeConvFp32Run(void *cdata, int task_id) { auto deconv = reinterpret_cast(cdata); auto error_code = deconv->DoDeconv(task_id); if (error_code != RET_OK) { @@ -194,7 +194,7 @@ int DeConvolutionCPUKernel::Run() { RowMajor2Col12Major(input_ptr_, pack_input_, input_plane_, conv_param_->input_channel_); - error_code = LiteBackendParallelLaunch(DeConvFp32Run, this, thread_count_); + error_code = ParallelLaunch(THREAD_POOL_DEFAULT, DeConvFp32Run, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; return error_code; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc index 844a239b9f..10a097a047 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise.cc @@ -134,7 +134,7 @@ int DeconvolutionDepthwiseCPUKernel::Execute(int task_id) { return RET_OK; } -int DeconvDwRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int DeconvDwRun(void *cdata, int task_id) { auto deconv_dw = reinterpret_cast(cdata); auto ret = deconv_dw->Execute(task_id); if (ret != RET_OK) { @@ -178,7 +178,7 @@ int DeconvolutionDepthwiseCPUKernel::Run() { packed_output_ = output_addr; } - ret = LiteBackendParallelLaunch(DeconvDwRun, this, conv_param_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, DeconvDwRun, this, conv_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/elu.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/elu.cc index 09636fccfb..bd54b2e2be 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu.cc @@ -46,7 +46,7 @@ int EluCPUKernel::DoExcute(int task_id) { return RET_OK; } -int EluRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int EluRun(void *cdata, int task_id) { auto EluData = reinterpret_cast(cdata); auto ret = EluData->DoExcute(task_id); if (ret != RET_OK) { @@ -65,7 +65,7 @@ int EluCPUKernel::Run() { input_addr = reinterpret_cast(in_tensors_.front()->Data()); output_addr = reinterpret_cast(out_tensors_.front()->Data()); - auto ret = LiteBackendParallelLaunch(EluRun, this, elu_parameter_->thread_num_); + auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, EluRun, this, elu_parameter_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc index ee0e316035..ef832f6257 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc @@ -61,7 +61,7 @@ int EmbeddingLookupCPUKernel::DoExcute(int task_id) { return RET_OK; } -int EmbeddingLookupRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int EmbeddingLookupRun(void *cdata, int task_id) { auto EmbeddingLookupData = reinterpret_cast(cdata); auto ret = EmbeddingLookupData->DoExcute(task_id); if (ret != RET_OK) { @@ -102,7 +102,7 @@ int EmbeddingLookupCPUKernel::Run() { output_addr_ = reinterpret_cast(out_tensors_.front()->Data()); ids_addr_ = reinterpret_cast(in_tensors_.back()->Data()); - auto ret = LiteBackendParallelLaunch(EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num); + auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num); context_->allocator->Free(input_addr_); context_->allocator->Free(embedding_lookup_parameter_->is_regulated_); if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.cc index d196bc0a51..3a49462bb2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims.cc @@ -56,7 +56,7 @@ int ExpandDimsCPUKernel::DoExpandDims(int task_id) { return RET_OK; } -int ExpandDimsRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ExpandDimsRun(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->DoExpandDims(task_id); if (ret != RET_OK) { @@ -74,7 +74,7 @@ int ExpandDimsCPUKernel::Run() { } in_ptr_ = reinterpret_cast(in_tensors_.at(0)->Data()); out_ptr_ = reinterpret_cast(out_tensors_.at(0)->Data()); - auto ret = LiteBackendParallelLaunch(ExpandDimsRun, this, thread_sz_count_); + auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, ExpandDimsRun, this, thread_sz_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "ExpandDimsRun error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fill.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fill.cc index 561d92b6ee..3ae36bf99d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fill.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fill.cc @@ -56,7 +56,7 @@ int FillCPUKernel::DoFill(int task_id) { return RET_OK; } -int FillRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int FillRun(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->DoFill(task_id); if (ret != RET_OK) { @@ -77,7 +77,7 @@ int FillCPUKernel::Run() { auto fill_data = reinterpret_cast(fillData->Data()); src_data_ = fill_data[0]; out_ptr_ = reinterpret_cast(output->Data()); - auto ret = LiteBackendParallelLaunch(FillRun, this, thread_sz_count_); + auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, FillRun, this, thread_sz_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "FillRun error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.cc index 2aaea20be8..226f609a98 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection.cc @@ -94,7 +94,7 @@ void FullconnectionCPUKernel::InitMatrixB(float *src_ptr, float *dst_ptr) { RowMajor2Col8Major(src_ptr, dst_ptr, fc_param_->col_, fc_param_->deep_); } -int FcFp32MatmulRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int FcFp32MatmulRun(void *cdata, int task_id) { auto fc = reinterpret_cast(cdata); auto error_code = fc->DoMatmul(task_id); if (error_code != RET_OK) { @@ -129,7 +129,7 @@ int FullconnectionCPUKernel::Run() { if (!fc_param_->a_const_) InitMatrixA(a_ptr, a_c12_ptr_); if (!fc_param_->b_const_) InitMatrixB(b_ptr, b_r8_ptr_); - LiteBackendParallelLaunch(FcFp32MatmulRun, this, thread_count_); + ParallelLaunch(THREAD_POOL_DEFAULT, FcFp32MatmulRun, this, thread_count_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gather.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gather.cc index cd44d271c0..3a2a77497f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/gather.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gather.cc @@ -89,7 +89,7 @@ int GatherCPUKernel::DoGather(int task_id) { return error_code; } -int GatherRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int GatherRun(void *cdata, int task_id) { auto gather_kernel = reinterpret_cast(cdata); auto error_code = gather_kernel->DoGather(task_id); if (error_code != RET_OK) { @@ -112,7 +112,7 @@ int GatherCPUKernel::Run() { context_->allocator->Free(indices_data_); return RET_ERROR; } - int error_code = LiteBackendParallelLaunch(GatherRun, this, op_parameter_->thread_num_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, GatherRun, this, op_parameter_->thread_num_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Gather function error error_code[" << error_code << "]"; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.cc index 518d74589e..961178e734 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd.cc @@ -105,7 +105,7 @@ int GatherNdCPUKernel::DoGatherNd(int task_id) { return RET_OK; } -int GatherNdRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int GatherNdRun(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->DoGatherNd(task_id); if (ret != RET_OK) { @@ -123,7 +123,7 @@ int GatherNdCPUKernel::Run() { } in_ptr_ = reinterpret_cast(in_tensors_.front()->Data()); out_ptr_ = reinterpret_cast(out_tensors_.front()->Data()); - auto ret = LiteBackendParallelLaunch(GatherNdRun, this, thread_sz_count_); + auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, GatherNdRun, this, thread_sz_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "gatherNd error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/leaky_relu.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/leaky_relu.cc index df623ff0e3..ec2a4cbc26 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/leaky_relu.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/leaky_relu.cc @@ -30,7 +30,7 @@ using mindspore::schema::PrimitiveType_Prelu; namespace mindspore::kernel { namespace { -int LeakyReluRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int LeakyReluRun(void *cdata, int task_id) { auto kernel_relu = reinterpret_cast(cdata); auto ret = kernel_relu->DoExcute(task_id); if (ret != RET_OK) { @@ -66,7 +66,7 @@ int LeakyReluCPUKernel::Run() { input_data = reinterpret_cast(input->Data()); output_data = reinterpret_cast(out_tensors_.at(0)->Data()); - auto ret = LiteBackendParallelLaunch(LeakyReluRun, this, context_->thread_num_); + auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, LeakyReluRun, this, context_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "PReluDwRun error: error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm.cc index 85cc36f414..15de35e18b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm.cc @@ -63,7 +63,7 @@ int LocalResponseNormCPUKernel::DoLocalResponseNorm(int task_id) { return RET_OK; } -int LocalResponseNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int LocalResponseNormRun(void *cdata, int task_id) { auto lrn = reinterpret_cast(cdata); auto error_code = lrn->DoLocalResponseNorm(task_id); if (error_code != RET_OK) { @@ -79,7 +79,7 @@ int LocalResponseNormCPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; return prepare_ret; } - int error_code = LiteBackendParallelLaunch(LocalResponseNormRun, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, LocalResponseNormRun, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "LocalResponseNorm function error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc index c88a6423bd..61392a80a4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul.cc @@ -147,7 +147,7 @@ int MatmulCPUKernel::RunImpl(int task_id) { return RET_OK; } -int MatmulFloatRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int MatmulFloatRun(void *cdata, int task_id) { auto op = reinterpret_cast(cdata); auto error_code = op->RunImpl(task_id); if (error_code != RET_OK) { @@ -178,7 +178,7 @@ int MatmulCPUKernel::Run() { a_ptr_ = a_c12_ptr_ + i * params_->row_12_ * params_->deep_; b_ptr_ = b_r8_ptr_ + i * params_->deep_ * params_->col_8_; c_ptr_ = c_src + i * params_->row_ * params_->col_; - LiteBackendParallelLaunch(MatmulFloatRun, this, thread_count_); + ParallelLaunch(THREAD_POOL_DEFAULT, MatmulFloatRun, this, thread_count_); } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot.cc index 757482f02a..c0bb5f87eb 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot.cc @@ -81,7 +81,7 @@ int OneHotCPUKernel::ReSize() { return RET_OK; } -int RunOneHot(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int RunOneHot(void *cdata, int task_id) { auto onehot_kernel = reinterpret_cast(cdata); if (onehot_kernel == nullptr) { MS_LOG(ERROR) << "cast OneHotCPUKernel failed"; @@ -166,7 +166,7 @@ int OneHotCPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; return prepare_ret; } - int error_code = LiteBackendParallelLaunch(RunOneHot, this, context_->thread_num_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, RunOneHot, this, context_->thread_num_); if (error_code != RET_OK) { MS_LOG(ERROR) << "OneHot function error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/pad.cc index 012f4ab4f4..51d9e9d1a1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad.cc @@ -68,7 +68,7 @@ int PadCPUKernel::ReSize() { return RET_OK; } -int PadImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int PadImpl(void *cdata, int task_id) { auto padKernel = reinterpret_cast(cdata); int error_code = padKernel->RunImpl(task_id); if (error_code != NNACL_OK) { @@ -102,7 +102,7 @@ int PadCPUKernel::Run() { auto output_data = reinterpret_cast(output->Data()); memset(output_data, 0, output_size * sizeof(float)); - int error_code = LiteBackendParallelLaunch(PadImpl, this, context_->thread_num_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, PadImpl, this, context_->thread_num_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling.cc index 61009096d5..1bcfcaff33 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling.cc @@ -78,7 +78,7 @@ int PoolingCPUKernel::RunImpl(int task_id) { return RET_OK; } -int PoolingImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int PoolingImpl(void *cdata, int task_id) { auto pooling = reinterpret_cast(cdata); auto error_code = pooling->RunImpl(task_id); if (error_code != RET_OK) { @@ -94,7 +94,7 @@ int PoolingCPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; return prepare_ret; } - int error_code = LiteBackendParallelLaunch(PoolingImpl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, PoolingImpl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/power.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/power.cc index 61212e207d..4b1cef6fdb 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/power.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/power.cc @@ -30,7 +30,7 @@ int PowerCPUKernel::Init() { return RET_OK; } int PowerCPUKernel::ReSize() { return RET_OK; } -int PowerImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int PowerImpl(void *cdata, int task_id) { auto kernel = reinterpret_cast(cdata); auto ret = kernel->RunImpl(task_id); if (ret != RET_OK) { @@ -46,7 +46,7 @@ int PowerCPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; return prepare_ret; } - auto ret = LiteBackendParallelLaunch(PowerImpl, this, thread_count_); + auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, PowerImpl, this, thread_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "PowerCPUKernel error: " << ret; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.cc index 337f9edb6b..b28248611a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu.cc @@ -28,7 +28,7 @@ using mindspore::schema::PrimitiveType_CaffePReLU; namespace mindspore::kernel { namespace { -int PReluRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int PReluRun(void *cdata, int task_id) { auto PRelu = reinterpret_cast(cdata); auto ret = PRelu->DoExcute(task_id); if (ret != RET_OK) { @@ -135,7 +135,7 @@ int PReluCPUKernel::Run() { auto negative_slope_tensor = in_tensors_.at(1); prelu_param_->slope_ = reinterpret_cast(negative_slope_tensor->Data()); - auto ret = LiteBackendParallelLaunch(PReluRun, this, prelu_param_->op_parameter_.thread_num_); + auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, PReluRun, this, prelu_param_->op_parameter_.thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "PRelu Run error: error_code[" << ret << "]"; context_->allocator->Free(input_data_); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc index aaf0b5a5cd..27125a0d4e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce.cc @@ -95,7 +95,7 @@ int ReduceCPUKernel::CallReduceUnit(int task_id) { return ret; } -int ReduceImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ReduceImpl(void *cdata, int task_id) { auto reduce = reinterpret_cast(cdata); auto error_code = reduce->CallReduceUnit(task_id); if (error_code != RET_OK) { @@ -125,7 +125,7 @@ int ReduceCPUKernel::Run() { inner_size_ *= tmp_shape_[k]; } axis_size_ = tmp_shape_[axis]; - auto error_code = LiteBackendParallelLaunch(ReduceImpl, this, context_->thread_num_); + auto error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ReduceImpl, this, context_->thread_num_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; return RET_ERROR; @@ -145,7 +145,7 @@ int ReduceCPUKernel::Run() { } axis_size_ = tmp_shape_[last_reduce_axis]; dst_data_ = reinterpret_cast(out_tensors_.at(0)->Data()); - auto error_code = LiteBackendParallelLaunch(ReduceImpl, this, context_->thread_num_); + auto error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ReduceImpl, this, context_->thread_num_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/resize.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/resize.cc index b4b3c360a1..598284768a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/resize.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/resize.cc @@ -38,7 +38,7 @@ int ResizeCPUKernel::Init() { return ReSize(); } -int ResizeImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ResizeImpl(void *cdata, int task_id) { auto resize = reinterpret_cast(cdata); auto error_code = resize->RunImpl(task_id); if (error_code != RET_OK) { @@ -94,7 +94,7 @@ int ResizeCPUKernel::Run() { MS_LOG(ERROR) << "Prepare failed."; return RET_ERROR; } - int error_code = LiteBackendParallelLaunch(ResizeImpl, this, context_->thread_num_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ResizeImpl, this, context_->thread_num_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.cc index e61ff43cb8..4eb82488cc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse.cc @@ -100,7 +100,7 @@ int ReverseCPUKernel::Init() { return ret; } -int ReverseRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ReverseRun(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->DoReverse(task_id); if (ret != RET_OK) { @@ -132,7 +132,7 @@ int ReverseCPUKernel::Run() { } in_ptr_ = reinterpret_cast(in_tensors_[0]->Data()); out_ptr_ = reinterpret_cast(out_tensors_[0]->Data()); - ret = LiteBackendParallelLaunch(ReverseRun, this, thread_sz_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ReverseRun, this, thread_sz_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "Reverse run error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.cc index 21718c5553..9256ada127 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling.cc @@ -72,7 +72,7 @@ int ROIPoolingCPUKernel::DoExecute(int task_id) { return RET_OK; } -int ROIPoolingRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ROIPoolingRun(void *cdata, int task_id) { auto Data = reinterpret_cast(cdata); auto ret = Data->DoExecute(task_id); if (ret != RET_OK) { @@ -91,7 +91,7 @@ int ROIPoolingCPUKernel::Run() { in_ptr_ = reinterpret_cast(in_tensors_.front()->Data()); out_ptr_ = reinterpret_cast(out_tensors_.front()->Data()); roi_ptr_ = reinterpret_cast(in_tensors_.at(1)->Data()); - ret = LiteBackendParallelLaunch(ROIPoolingRun, this, param_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ROIPoolingRun, this, param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "ROIPooling error: error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc index 7405331d12..aee1ff01ee 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc @@ -137,7 +137,7 @@ int ScaleCPUKernel::Scale(int task_id) { return RET_OK; } -int ScaleRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ScaleRun(void *cdata, int task_id) { auto scale = reinterpret_cast(cdata); auto ret = scale->Scale(task_id); if (ret != RET_OK) { @@ -162,7 +162,7 @@ int ScaleCPUKernel::Run() { auto out_tensor = out_tensors_.front(); output_ptr_ = reinterpret_cast(out_tensor->Data()); - ret = LiteBackendParallelLaunch(ScaleRun, this, op_parameter_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ScaleRun, this, op_parameter_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc index 28b7a3816d..04917fdcf2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc @@ -137,7 +137,7 @@ int ScatterNDCPUKernel::ScatterND(int task_id) { return RET_OK; } -int ScatterNDRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ScatterNDRun(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->ScatterND(task_id); if (ret != RET_OK) { @@ -153,7 +153,7 @@ int ScatterNDCPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail!ret: " << ret; return ret; } - ret = LiteBackendParallelLaunch(ScatterNDRun, this, thread_n_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ScatterNDRun, this, thread_n_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "ScatterND error error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc index d81023a373..dccbd7a40a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc @@ -29,13 +29,13 @@ using mindspore::schema::PrimitiveType_Slice; namespace mindspore::kernel { namespace { -int SliceLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { +int SliceLaunch(void *cdata, int task_id) { if (cdata == nullptr) { MS_LOG(ERROR) << "Input cdata is nullptr!"; return RET_NULL_PTR; } auto kernel = reinterpret_cast(cdata); - return kernel->SliceParallelRun(thread_id); + return kernel->SliceParallelRun(task_id); } } // namespace @@ -97,7 +97,7 @@ int SliceCPUKernel::Run() { DoSliceNoParallel(input_data, output_data, param); return RET_OK; } - ret = LiteBackendParallelLaunch(SliceLaunch, this, param->op_parameter_.thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, SliceLaunch, this, param->op_parameter_.thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "slice launch fail!ret: " << ret; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.cc index 4e927e736b..e08f383894 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.cc @@ -74,7 +74,7 @@ int SpaceToDepthCPUKernel::SpaceToDepth(int task_id) { return RET_OK; } -int SpaceToDepthRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int SpaceToDepthRun(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->SpaceToDepth(task_id); if (ret != RET_OK) { @@ -93,7 +93,7 @@ int SpaceToDepthCPUKernel::Run() { input_ptr_ = reinterpret_cast(in_tensors_[0]->Data()); output_ptr_ = reinterpret_cast(out_tensors_[0]->Data()); if (in_tensors_[0]->GetFormat() == schema::Format_NHWC) { - ret = LiteBackendParallelLaunch(SpaceToDepthRun, this, thread_h_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, SpaceToDepthRun, this, thread_h_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "SpaceToDepth error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.cc index 16f89d1819..3e4d0b92ca 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense.cc @@ -39,7 +39,7 @@ int SparseToDenseCPUKernel::DoExcute(int task_id) { return RET_OK; } -int SparseToDenseRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int SparseToDenseRun(void *cdata, int task_id) { auto s2ddata = reinterpret_cast(cdata); auto ret = s2ddata->DoExcute(task_id); if (ret != RET_OK) { @@ -70,7 +70,7 @@ int SparseToDenseCPUKernel::Run() { std::vector temp_shape = output0->shape(); output_shape_ = reinterpret_cast(temp_shape.data()); - ret = LiteBackendParallelLaunch(SparseToDenseRun, this, s2d_param_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, SparseToDenseRun, this, s2d_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "SparseToDenseRun error: error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/split.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/split.cc index 7fe0f68eae..cb56abae50 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/split.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/split.cc @@ -62,7 +62,7 @@ int SplitCPUKernel::Split(int task_id) { return RET_OK; } -int SplitRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int SplitRun(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->Split(task_id); if (ret != RET_OK) { @@ -83,7 +83,7 @@ int SplitCPUKernel::Run() { for (int i = 0; i < param->num_split_; i++) { output_ptr_[i] = reinterpret_cast(out_tensors_.at(i)->Data()); } - ret = LiteBackendParallelLaunch(SplitRun, this, thread_n_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, SplitRun, this, thread_n_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc index 4985aa6d5a..283906e3d3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc @@ -72,7 +72,7 @@ int TransposeCPUKernel::TransposeParallel(int task_id) { return RET_OK; } -int TransposeRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int TransposeRun(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->TransposeParallel(task_id); if (ret != RET_OK) { @@ -101,7 +101,7 @@ int TransposeCPUKernel::Run() { in_shape_ = const_cast(in_tensor->shape().data()); out_shape_ = const_cast(out_tensor->shape().data()); - ret = LiteBackendParallelLaunch(TransposeRun, this, thread_h_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, TransposeRun, this, thread_h_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "Tranpose error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze.cc index 57d545e24c..496c8e3f8a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze.cc @@ -55,7 +55,7 @@ int UnsqueezeCPUKernel::DoUnsqueeze(int task_id) { return RET_OK; } -int UnsqueezeRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int UnsqueezeRun(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->DoUnsqueeze(task_id); if (ret != RET_OK) { @@ -73,7 +73,7 @@ int UnsqueezeCPUKernel::Run() { } in_ptr_ = reinterpret_cast(in_tensors_.at(0)->Data()); out_ptr_ = reinterpret_cast(out_tensors_.at(0)->Data()); - ret = LiteBackendParallelLaunch(UnsqueezeRun, this, thread_sz_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, UnsqueezeRun, this, thread_sz_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "UnsqueezeRun error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/where.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/where.cc index 4a853aa2c0..3a35179a20 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/where.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/where.cc @@ -38,7 +38,7 @@ int WhereCPUKernel::DoExcute(int task_id) { return RET_OK; } -int WhereRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int WhereRun(void *cdata, int task_id) { auto wheredata = reinterpret_cast(cdata); auto ret = wheredata->DoExcute(task_id); if (ret != RET_OK) { @@ -79,7 +79,7 @@ int WhereCPUKernel::Run() { MS_LOG(ERROR) << "Error, inputs' length are zero !!!"; return RET_ERROR; } - ret = LiteBackendParallelLaunch(WhereRun, this, where_param_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, WhereRun, this, where_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "WhereDwRun error: error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc index a49dcc663b..6cc4999499 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc @@ -70,7 +70,7 @@ int ActivationGradCPUKernel::DoActivation(int task_id) { return RET_OK; } -int ActivationGradRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ActivationGradRun(void *cdata, int task_id) { auto activationGrad_kernel = reinterpret_cast(cdata); auto error_code = activationGrad_kernel->DoActivation(task_id); if (error_code != RET_OK) { @@ -81,7 +81,7 @@ int ActivationGradRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { } int ActivationGradCPUKernel::Run() { - int error_code = LiteBackendParallelLaunch(ActivationGradRun, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ActivationGradRun, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc index 8b03aaa9e3..f7eea6ccd1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc @@ -102,17 +102,17 @@ int QuantizedAddCPUKernel::Run() { TileDimensionsUint8(static_cast(in_tensors_.at(0)->Data()), static_cast(in_tensors_.at(1)->Data()), reinterpret_cast(input0_data_), reinterpret_cast(input1_data_), &tile_para); - ret = LiteBackendParallelLaunch(AddInt8Run, this, thread_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, AddInt8Run, this, thread_count_); ctx_->allocator->Free(input0_data_); ctx_->allocator->Free(input1_data_); return ret; } - ret = LiteBackendParallelLaunch(AddInt8Run, this, thread_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, AddInt8Run, this, thread_count_); return ret; } -int AddInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int AddInt8Run(void *cdata, int task_id) { auto add = reinterpret_cast(cdata); add->DoExecute(task_id); return lite::RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h index be83375b0d..77d76fbc18 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h @@ -46,7 +46,7 @@ class QuantizedAddCPUKernel : public LiteKernel { int8_t *output_data_ = nullptr; }; -int AddInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata); +int AddInt8Run(void *cdata, int task_id); } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ADD_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc index c05aac9896..02fa869545 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc @@ -36,11 +36,11 @@ using mindspore::schema::PrimitiveType_NotEqual; namespace mindspore::kernel { namespace { -int ArithmeticsInt8Launch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { +int ArithmeticsInt8Launch(void *cdata, int task_id) { auto arithmetic_kernel = reinterpret_cast(cdata); - auto error_code = arithmetic_kernel->DoArithmetic(thread_id); + auto error_code = arithmetic_kernel->DoArithmetic(task_id); if (error_code != RET_OK) { - MS_LOG(ERROR) << "ArithmeticsRun error thread_id[" << thread_id << "] error_code[" << error_code << "]"; + MS_LOG(ERROR) << "ArithmeticsRun error thread_id[" << task_id << "] error_code[" << error_code << "]"; return error_code; } return RET_OK; @@ -151,7 +151,7 @@ int ArithmeticInt8CPUKernel::Run() { } TileDimensionsInt8(input_data0, input_data1, tile_data0_, tile_data1_, param); } - ret = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, op_parameter_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ArithmeticsInt8Launch, this, op_parameter_->thread_num_); if (param->broadcasting_) { context_->allocator->Free(tile_data0_); context_->allocator->Free(tile_data1_); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc index 56b1a0fc2c..43c3a36123 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.cc @@ -65,7 +65,7 @@ int ArithmeticSelfInt8CPUKernel::ReSize() { return RET_OK; } -int ArithmeticSelfInt8Runs(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ArithmeticSelfInt8Runs(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->DoArithmeticSelf(task_id); if (ret != RET_OK) { @@ -104,7 +104,7 @@ int ArithmeticSelfInt8CPUKernel::Run() { auto out_tensor = out_tensors_.at(0); in_ptr_ = reinterpret_cast(input_tensor->Data()); out_ptr_ = reinterpret_cast(out_tensor->Data()); - ret = LiteBackendParallelLaunch(ArithmeticSelfInt8Runs, this, thread_sz_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ArithmeticSelfInt8Runs, this, thread_sz_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc index e702bcc27d..846f6f16b4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc @@ -180,7 +180,7 @@ int BatchnormInt8CPUKernel::DoExecute(int task_id) { return RET_OK; } -int BatchNormInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int BatchNormInt8Run(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->DoExecute(task_id); if (ret != RET_OK) { @@ -199,7 +199,7 @@ int BatchnormInt8CPUKernel::Run() { in_addr_ = reinterpret_cast(in_tensors_.at(0)->Data()); out_addr_ = reinterpret_cast(out_tensors_.at(0)->Data()); - int ret = LiteBackendParallelLaunch(BatchNormInt8Run, this, batchnorm_param_->op_parameter_.thread_num_); + int ret = ParallelLaunch(THREAD_POOL_DEFAULT, BatchNormInt8Run, this, batchnorm_param_->op_parameter_.thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc index d60bde5f3e..8aad2d5716 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc @@ -104,12 +104,12 @@ int ConcatInt8CPUKernel::Run() { } output_data_ = reinterpret_cast(out_tensors_.at(0)->Data()); - ret = LiteBackendParallelLaunch(ConcatInt8Run, this, thread_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConcatInt8Run, this, thread_count_); return ret; } -int ConcatInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ConcatInt8Run(void *cdata, int task_id) { auto concat = reinterpret_cast(cdata); concat->DoExecute(task_id); return lite::RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h index 7a677034db..0f8780fd2f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h @@ -56,7 +56,7 @@ class ConcatInt8CPUKernel : public ConcatBaseCPUKernel { int8_t *output_data_ = nullptr; }; -int ConcatInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata); +int ConcatInt8Run(void *cdata, int task_id); } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONCAT_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc index 9e8c7968be..a64d94d1f8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc @@ -217,7 +217,7 @@ int Convolution1x1Int8CPUKernel::RunImpl(int task_id) { return RET_OK; } -int Convolution1x1Int8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int Convolution1x1Int8Impl(void *cdata, int task_id) { auto conv = reinterpret_cast(cdata); auto error_code = conv->RunImpl(task_id); if (error_code != RET_OK) { @@ -253,7 +253,7 @@ int Convolution1x1Int8CPUKernel::Run() { PackInputSum16x4Int8(packed_input_, input_sum_, matmul_param_->deep_, matmul_param_->col_, matmul_param_->row_, conv_param_); - int error_code = LiteBackendParallelLaunch(Convolution1x1Int8Impl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, Convolution1x1Int8Impl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv1x1 fp16 error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc index 5dbddbaba2..56e7557c4b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc @@ -213,7 +213,7 @@ int Convolution3x3Int8CPUKernel::RunImpl(int task_id) { return RET_OK; } -int Convolution3x3Int8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int Convolution3x3Int8Impl(void *cdata, int task_id) { auto conv = reinterpret_cast(cdata); auto error_code = conv->RunImpl(task_id); if (error_code != RET_OK) { @@ -238,7 +238,7 @@ int Convolution3x3Int8CPUKernel::Run() { auto input_addr = reinterpret_cast(in_tensors_.at(kInputIndex)->Data()); PackInputToC8Int8(input_addr, input_data_, conv_param_); - int error_code = LiteBackendParallelLaunch(Convolution3x3Int8Impl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, Convolution3x3Int8Impl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv3x3 int8 error error_code[" << error_code << "]"; FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc index 4cf2b00ec8..3b8bfa935a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc @@ -128,7 +128,7 @@ int ConvolutionDepthwiseInt8CPUKernel::Execute(int task_id) { return RET_OK; } -int ConvDwInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ConvDwInt8Run(void *cdata, int task_id) { auto conv_dw_int8 = reinterpret_cast(cdata); auto ret = conv_dw_int8->Execute(task_id); if (ret != RET_OK) { @@ -164,7 +164,7 @@ int ConvolutionDepthwiseInt8CPUKernel::Run() { packed_output_ = output_addr; } - ret = LiteBackendParallelLaunch(ConvDwInt8Run, this, conv_param_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ConvDwInt8Run, this, conv_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "ConvDwInt8Run error: error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc index 28dfc57340..e3f6703741 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc @@ -338,7 +338,7 @@ int ConvolutionInt8CPUKernel::RunImpl(int task_id) { return RET_OK; } -int ConvolutionInt8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ConvolutionInt8Impl(void *cdata, int task_id) { auto conv = reinterpret_cast(cdata); auto error_code = conv->RunImpl(task_id); if (error_code != RET_OK) { @@ -374,7 +374,7 @@ int ConvolutionInt8CPUKernel::Run() { convert_func_(ori_input_data, nhwc4_input_, conv_param_->input_batch_, conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); - int error_code = LiteBackendParallelLaunch(ConvolutionInt8Impl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ConvolutionInt8Impl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv int8 error error_code[" << error_code << "]"; FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc index 0feb6e6614..afc1c6545d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc @@ -67,7 +67,7 @@ int CropInt8CPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail!ret: " << ret; return ret; } - ret = LiteBackendParallelLaunch(CropInt8Run, this, thread_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, CropInt8Run, this, thread_count_); return ret; } @@ -91,7 +91,7 @@ void PadOffset(int input_dim, CropParameter *crop_para) { } } -int CropInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int CropInt8Run(void *cdata, int task_id) { auto crop = reinterpret_cast(cdata); crop->DoExecute(task_id); return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h index 46aabf4354..3cbcaba8eb 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h @@ -46,7 +46,7 @@ class CropInt8CPUKernel : public CropBaseCPUKernel { CropParameter *crop_para_; }; -int CropInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata); +int CropInt8Run(void *cdata, int task_id); void PadOffset(int input_dim, CropParameter *crop_para); } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc index 74916a7586..ba4dca80fc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc @@ -164,7 +164,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::Execute(int task_id) { return RET_OK; } -int DeconvDwInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int DeconvDwInt8Run(void *cdata, int task_id) { auto deconv_dw_int8 = reinterpret_cast(cdata); auto ret = deconv_dw_int8->Execute(task_id); if (ret != RET_OK) { @@ -196,7 +196,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() { packed_output_ = output_addr; } - ret = LiteBackendParallelLaunch(DeconvDwInt8Run, this, conv_param_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, DeconvDwInt8Run, this, conv_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "DeconvDwInt8Run error: error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc index 55e8f60d50..8f4b06d55d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc @@ -216,7 +216,7 @@ void DeConvInt8CPUKernel::FreeRunBuf() { return; } -int DeConvInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int DeConvInt8Run(void *cdata, int task_id) { auto deconv = reinterpret_cast(cdata); auto error_code = deconv->DoDeconv(task_id); if (error_code != RET_OK) { @@ -272,7 +272,7 @@ int DeConvInt8CPUKernel::Run() { DeConvPackInputSum(input_ptr_, input_sum_, conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_, UP_ROUND(matmul_param_->row_, C4NUM), UP_ROUND(matmul_param_->deep_, C16NUM), support_optimize_); - error_code = LiteBackendParallelLaunch(DeConvInt8Run, this, thread_count_); + error_code = ParallelLaunch(THREAD_POOL_DEFAULT, DeConvInt8Run, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "deconv int8 run error! error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc index 3550bace2f..0249cccdf4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc @@ -87,7 +87,7 @@ int DivInt8CPUKernel::DoExecute(int task_id) { return ret; } -int DivInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int DivInt8Run(void *cdata, int task_id) { auto div_kernel = reinterpret_cast(cdata); auto ret = div_kernel->DoExecute(task_id); if (ret != RET_OK) { @@ -123,7 +123,7 @@ int DivInt8CPUKernel::Run() { static_cast(in_tensors_.at(1)->Data()), reinterpret_cast(tile0_data_), reinterpret_cast(tile1_data_), &tile_para); } - ret = LiteBackendParallelLaunch(DivInt8Run, this, op_parameter_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, DivInt8Run, this, op_parameter_->thread_num_); if (broadcast_) { context_->allocator->Free(tile0_data_); context_->allocator->Free(tile1_data_); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.cc index 54284d72d4..48e4ffec66 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.cc @@ -118,7 +118,7 @@ int FullconnectionInt8CPUKernel::RunImpl(int task_id) { return RET_OK; } -int FcInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int FcInt8Run(void *cdata, int task_id) { auto fc = reinterpret_cast(cdata); auto ret = fc->RunImpl(task_id); if (ret != RET_OK) { @@ -137,7 +137,7 @@ int FullconnectionInt8CPUKernel::Run() { auto input_ptr = reinterpret_cast(in_tensors_[0]->Data()); RowMajor2Row4x16Major(input_ptr, fc_param_->row_, fc_param_->deep_, a_r4x16_ptr_, d16_); CalcInputSums(input_ptr, fc_param_->row_, fc_param_->deep_, quant_params_.weight.zp_, input_sums_); - LiteBackendParallelLaunch(FcInt8Run, this, thread_count_); + ParallelLaunch(THREAD_POOL_DEFAULT, FcInt8Run, this, thread_count_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc index 4a90d375d3..f5539b9195 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc @@ -114,7 +114,7 @@ int GatherNdInt8CPUKernel::DoGatherNd(int task_id) { return RET_OK; } -int GatherNdInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int GatherNdInt8Run(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->DoGatherNd(task_id); if (ret != RET_OK) { @@ -132,7 +132,7 @@ int GatherNdInt8CPUKernel::Run() { } in_ptr_ = reinterpret_cast(in_tensors_.front()->Data()); out_ptr_ = reinterpret_cast(out_tensors_.front()->Data()); - auto ret = LiteBackendParallelLaunch(GatherNdInt8Run, this, thread_sz_count_); + auto ret = ParallelLaunch(THREAD_POOL_DEFAULT, GatherNdInt8Run, this, thread_sz_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "gatherNd error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc index da2e3e6ac5..749123770a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc @@ -111,7 +111,7 @@ int GatherInt8CPUKernel::DoGather(int task_id) { return RET_OK; } -int GatherInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int GatherInt8Run(void *cdata, int task_id) { auto gather_kernel = reinterpret_cast(cdata); auto error_code = gather_kernel->DoGather(task_id); if (error_code != RET_OK) { @@ -127,7 +127,7 @@ int GatherInt8CPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; return prepare_ret; } - int error_code = LiteBackendParallelLaunch(GatherInt8Run, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, GatherInt8Run, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Gather function error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.cc index 686e14cf61..8ece51bc81 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.cc @@ -78,7 +78,7 @@ int HswishInt8CPUKernel::DoActivation(int task_id) { return RET_OK; } -int HswishInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int HswishInt8Run(void *cdata, int task_id) { auto activation_kernel = reinterpret_cast(cdata); auto error_code = activation_kernel->DoActivation(task_id); if (error_code != RET_OK) { @@ -94,7 +94,7 @@ int HswishInt8CPUKernel::Run() { MS_LOG(ERROR) << "Prepare failed."; return RET_ERROR; } - int error_code = LiteBackendParallelLaunch(HswishInt8Run, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, HswishInt8Run, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "HswishInt8Run function error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.cc index 9bc770b1cb..2593794921 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.cc @@ -82,13 +82,13 @@ int LeakyReluInt8CPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail!ret: " << ret; return ret; } - ret = LiteBackendParallelLaunch(PreluInt8Run, this, op_parameter_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, PreluInt8Run, this, op_parameter_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "RunPreluParam failed. errorcode: "; } return RET_OK; } -int PreluInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int PreluInt8Run(void *cdata, int task_id) { auto prelu = reinterpret_cast(cdata); prelu->DoExecute(task_id); return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h index 9df765079a..c995f7313d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h @@ -41,7 +41,7 @@ class LeakyReluInt8CPUKernel : public LeakyReluBaseCPUKernel { private: LeakyReluQuantArg quant_prelu_parm_; }; -int PreluInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata); +int PreluInt8Run(void *cdata, int task_id); } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_PRELU_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc index 935f74d454..aa93c9d4c4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_int8.cc @@ -111,7 +111,7 @@ int MatmulInt8CPUKernel::RunImpl(int task_id) { return RET_OK; } -int MatmulInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int MatmulInt8Run(void *cdata, int task_id) { auto op = reinterpret_cast(cdata); auto ret = op->RunImpl(task_id); if (ret != RET_OK) { @@ -152,7 +152,7 @@ int MatmulInt8CPUKernel::Run() { auto &q = quant_params_; CalcInputSums(cur_a_ptr, params_->row_, params_->deep_, q.weight.zp_, input_sums_); CalcWeightBiasSums(cur_b_ptr, params_->deep_, params_->col_, q.input.zp_, q.weight.zp_, NULL, weight_bias_sums_); - ret = LiteBackendParallelLaunch(MatmulInt8Run, this, thread_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, MatmulInt8Run, this, thread_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "MatmulInt8Run error: [" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc index 42ddfe3d6f..d4cad12b42 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc @@ -86,17 +86,17 @@ int MulInt8CPUKernel::Run() { } TileDimensionsInt8(static_cast(in_tensors_.at(0)->Data()), static_cast(in_tensors_.at(1)->Data()), input0_data_, input1_data_, &tile_para); - ret = LiteBackendParallelLaunch(MulInt8Run, this, thread_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, MulInt8Run, this, thread_count_); ctx_->allocator->Free(input0_data_); ctx_->allocator->Free(input1_data_); return ret; } - ret = LiteBackendParallelLaunch(MulInt8Run, this, thread_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, MulInt8Run, this, thread_count_); return ret; } -int MulInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int MulInt8Run(void *cdata, int task_id) { auto mul = reinterpret_cast(cdata); mul->DoExecute(task_id); return lite::RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h index 36d9984cac..9f00e2e8e1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h @@ -46,7 +46,7 @@ class MulInt8CPUKernel : public LiteKernel { int8_t *output_data_ = nullptr; }; -int MulInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata); +int MulInt8Run(void *cdata, int task_id); } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MUL_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc index 0f67fa9d9d..f836cfa22a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc @@ -112,7 +112,7 @@ int PadInt8CPUKernel::RunImpl(int task_id) { return PadConstant4D(in_data_, out_data_, in_dims_, out_dims_, pad_param_->paddings_, task_id, context_->thread_num_); } -int PadInt8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int PadInt8Impl(void *cdata, int task_id) { auto resize = reinterpret_cast(cdata); auto error_code = resize->RunImpl(task_id); if (error_code != RET_OK) { @@ -132,7 +132,7 @@ int PadInt8CPUKernel::Run() { out_data_ = reinterpret_cast(out_tensors_[0]->Data()); memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0], out_tensors_[0]->ElementsNum() * sizeof(int8_t)); - int error_code = LiteBackendParallelLaunch(PadInt8Impl, this, context_->thread_num_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, PadInt8Impl, this, context_->thread_num_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc index f9e990bab4..72749cddb3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc @@ -69,7 +69,7 @@ int PoolingInt8CPUKernel::RunImpl(int task_id) { return RET_OK; } -int PoolingInt8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int PoolingInt8Impl(void *cdata, int task_id) { auto pooling = reinterpret_cast(cdata); auto error_code = pooling->RunImpl(task_id); if (error_code != RET_OK) { @@ -85,7 +85,7 @@ int PoolingInt8CPUKernel::Run() { MS_LOG(ERROR) << "Prepare failed."; return RET_ERROR; } - int error_code = LiteBackendParallelLaunch(PoolingInt8Impl, this, thread_count_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, PoolingInt8Impl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "poolingInt8 error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/power_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/power_int8.cc index 9e7aa10d62..5abe82c9b9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/power_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/power_int8.cc @@ -88,7 +88,7 @@ int PowerInt8CPUKernel::DoPower(int task_id) { return ret; } -int PowerInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int PowerInt8Run(void *cdata, int task_id) { auto power_kernel = reinterpret_cast(cdata); auto ret = power_kernel->DoPower(task_id); if (ret != RET_OK) { @@ -103,7 +103,7 @@ int PowerInt8CPUKernel::Run() { MS_LOG(ERROR) << "Prepare failed."; return ret; } - ret = LiteBackendParallelLaunch(PowerInt8Run, this, op_parameter_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, PowerInt8Run, this, op_parameter_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "PowerInt8Run error, error_code[" << ret << "]"; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc index 90a2e5aad8..2e498bbcc8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.cc @@ -235,7 +235,7 @@ int ReduceInt8CPUKernel::ReSize() { return ret; } -int ReduceInt8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ReduceInt8Impl(void *cdata, int task_id) { auto reduce = reinterpret_cast(cdata); auto error_code = reduce->CallReduceUnit(task_id); if (error_code != RET_OK) { @@ -284,7 +284,7 @@ int ReduceInt8CPUKernel::Run() { inner_size_ *= tmp_shape_[k]; } axis_size_ = tmp_shape_[axis]; - auto error_code = LiteBackendParallelLaunch(ReduceInt8Impl, this, context_->thread_num_); + auto error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ReduceInt8Impl, this, context_->thread_num_); if (error_code != RET_OK) { FreeTmpBuffer(); MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; @@ -321,7 +321,7 @@ int ReduceInt8CPUKernel::Run() { axis_size_ = tmp_shape_[last_reduce_axis]; last_dst_data_ = reinterpret_cast(out_tensors_.at(0)->Data()); is_last_axis_ = true; - auto error_code = LiteBackendParallelLaunch(ReduceInt8Impl, this, context_->thread_num_); + auto error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ReduceInt8Impl, this, context_->thread_num_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.cc index 8292653610..8ec6f39d58 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.cc @@ -58,7 +58,7 @@ int ReluXInt8CPUKernel::DoActivation(int task_id) { return RET_OK; } -int ReluXInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ReluXInt8Run(void *cdata, int task_id) { auto activation_kernel = reinterpret_cast(cdata); auto error_code = activation_kernel->DoActivation(task_id); if (error_code != RET_OK) { @@ -74,7 +74,7 @@ int ReluXInt8CPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail!ret: " << ret; return ret; } - int error_code = LiteBackendParallelLaunch(ReluXInt8Run, this, op_parameter_->thread_num_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ReluXInt8Run, this, op_parameter_->thread_num_); if (error_code != RET_OK) { MS_LOG(ERROR) << "ReluXInt8Run function error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc index 02149c3847..a730a61c48 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc @@ -60,11 +60,11 @@ int ReshapeInt8CPUKernel::Run() { elements_num_ = in_tensors_.at(kInputIndex)->ElementsNum(); count_unit_ = op_parameter_->thread_num_ > 1 ? UP_DIV(elements_num_, op_parameter_->thread_num_) : elements_num_; - ret = LiteBackendParallelLaunch(ReshapeInt8Run, this, op_parameter_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, ReshapeInt8Run, this, op_parameter_->thread_num_); return ret; } -int ReshapeInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ReshapeInt8Run(void *cdata, int task_id) { auto reshape = reinterpret_cast(cdata); reshape->DoExecute(task_id); return lite::RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h index 13f2450342..61115acdd8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h @@ -46,7 +46,7 @@ class ReshapeInt8CPUKernel : public ReshapeBaseCPUKernel { int8_t *output_data_ = nullptr; }; -int ReshapeInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata); +int ReshapeInt8Run(void *cdata, int task_id); } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_RESHAPE_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc index 7af0c3f853..aab798265a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc @@ -56,7 +56,7 @@ int ResizeInt8CPUKernel::Init() { return ReSize(); } -int ResizeInt8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int ResizeInt8Impl(void *cdata, int task_id) { auto resize = reinterpret_cast(cdata); auto error_code = resize->RunImpl(task_id); if (error_code != RET_OK) { @@ -124,7 +124,7 @@ int ResizeInt8CPUKernel::Run() { MS_LOG(ERROR) << "Prepare failed."; return RET_ERROR; } - int error_code = LiteBackendParallelLaunch(ResizeInt8Impl, this, context_->thread_num_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, ResizeInt8Impl, this, context_->thread_num_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.cc index 750fdcea9f..2add85dd2e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.cc @@ -79,7 +79,7 @@ int SigmoidInt8CPUKernel::DoActivation(int task_id) { return RET_OK; } -int SigmoidInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int SigmoidInt8Run(void *cdata, int task_id) { auto activation_kernel = reinterpret_cast(cdata); auto error_code = activation_kernel->DoActivation(task_id); if (error_code != RET_OK) { @@ -95,7 +95,7 @@ int SigmoidInt8CPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail!ret: " << ret; return ret; } - int error_code = LiteBackendParallelLaunch(SigmoidInt8Run, this, op_parameter_->thread_num_); + int error_code = ParallelLaunch(THREAD_POOL_DEFAULT, SigmoidInt8Run, this, op_parameter_->thread_num_); if (error_code != RET_OK) { MS_LOG(ERROR) << "SigmoidInt8Run function error error_code[" << error_code << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.cc index 1ba2df8f25..631273ec16 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.cc @@ -68,7 +68,7 @@ int SliceInt8CPUKernel::DoSlice(int task_id) { return ret; } -int SliceInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int SliceInt8Run(void *cdata, int task_id) { auto slice_kernel = reinterpret_cast(cdata); auto ret = slice_kernel->DoSlice(task_id); if (ret != RET_OK) { @@ -90,7 +90,7 @@ int SliceInt8CPUKernel::Run() { if (param_->size_[1] < param_->op_parameter_.thread_num_) { ret = SliceInt8NoParallel(input_data, output_data, param_); } else { - ret = LiteBackendParallelLaunch(SliceInt8Run, this, op_parameter_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, SliceInt8Run, this, op_parameter_->thread_num_); } if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc index 20f52429e7..1b16a00336 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc @@ -94,7 +94,7 @@ int SoftmaxInt8CPUKernel::DoSoftmax(int task_id) { return RET_OK; } -int SoftmaxRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int SoftmaxRun(void *cdata, int task_id) { auto softmax_kernel = reinterpret_cast(cdata); auto error_code = softmax_kernel->DoSoftmax(task_id); if (error_code != RET_OK) { @@ -122,7 +122,7 @@ int SoftmaxInt8CPUKernel::Run() { context_->allocator->Free(sum_data_); return RET_ERROR; } - ret = LiteBackendParallelLaunch(SoftmaxRun, this, thread_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, SoftmaxRun, this, thread_count_); context_->allocator->Free(exp_data_); context_->allocator->Free(sum_data_); if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc index b69891f45d..676da57151 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc @@ -71,7 +71,7 @@ int SplitInt8CPUKernel::Split(int task_id) { return RET_OK; } -int SplitInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int SplitInt8Run(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->Split(task_id); if (ret != RET_OK) { @@ -94,7 +94,7 @@ int SplitInt8CPUKernel::Run() { output_ptr_.push_back(reinterpret_cast(out_tensors_.at(i)->Data())); } - ret = LiteBackendParallelLaunch(SplitInt8Run, this, thread_n_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, SplitInt8Run, this, thread_n_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc index 311a3e41f4..c1e4196b6e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc @@ -157,7 +157,7 @@ int SqueezeInt8CPUKernel::Run() { free(*(inputs_array + i)); } - ret = LiteBackendParallelLaunch(SqueezeInt8Run, this, thread_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, SqueezeInt8Run, this, thread_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "RunSqueezeParam failed. errorcode: "; } @@ -165,7 +165,7 @@ int SqueezeInt8CPUKernel::Run() { return ret; } -int SqueezeInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int SqueezeInt8Run(void *cdata, int task_id) { auto Squeeze = reinterpret_cast(cdata); Squeeze->DoExecute(task_id); return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h index 128e32425e..6d205ce62b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h @@ -44,7 +44,7 @@ class SqueezeInt8CPUKernel : public SqueezeBaseCPUKernel { SqueezeQuantArg *quant_Squeeze_parm_; }; -int SqueezeInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata); +int SqueezeInt8Run(void *cdata, int task_id); } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc index 9ffca499ac..9a4f705072 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc @@ -110,7 +110,7 @@ int SubInt8CPUKernel::DoExecute(int task_id) { return RET_OK; } -int SubInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int SubInt8Run(void *cdata, int task_id) { auto sub_kernel = reinterpret_cast(cdata); auto ret = sub_kernel->DoExecute(task_id); if (ret != RET_OK) { @@ -147,7 +147,7 @@ int SubInt8CPUKernel::Run() { static_cast(in_tensors_.at(1)->Data()), reinterpret_cast(tile0_data_), reinterpret_cast(tile1_data_), &tile_para); } - ret = LiteBackendParallelLaunch(SubInt8Run, this, op_parameter_->thread_num_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, SubInt8Run, this, op_parameter_->thread_num_); if (broadcast_) { context_->allocator->Free(tile0_data_); context_->allocator->Free(tile1_data_); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.cc index 35c7200b56..98f3d9067a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.cc @@ -70,7 +70,7 @@ int Unsqueezeint8CPUKernel::DoUnsqueeze(int task_id) { return RET_OK; } -int UnsqueezeIn8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { +int UnsqueezeIn8Run(void *cdata, int task_id) { auto g_kernel = reinterpret_cast(cdata); auto ret = g_kernel->DoUnsqueeze(task_id); if (ret != RET_OK) { @@ -88,7 +88,7 @@ int Unsqueezeint8CPUKernel::Run() { } in_ptr_ = reinterpret_cast(in_tensors_.at(0)->Data()); out_ptr_ = reinterpret_cast(out_tensors_.at(0)->Data()); - ret = LiteBackendParallelLaunch(UnsqueezeIn8Run, this, thread_sz_count_); + ret = ParallelLaunch(THREAD_POOL_DEFAULT, UnsqueezeIn8Run, this, thread_sz_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "UnsqueezeRun error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/parallel_executor.cc b/mindspore/lite/src/runtime/parallel_executor.cc index 5d4c983cea..0d14f7b41d 100644 --- a/mindspore/lite/src/runtime/parallel_executor.cc +++ b/mindspore/lite/src/runtime/parallel_executor.cc @@ -16,26 +16,22 @@ #include #include "src/runtime/parallel_executor.h" -using mindspore::predict::ThreadPool; -using mindspore::predict::TvmEnv; +#include "include/thread_pool_config.h" +#include "src/runtime/runtime_api.h" + #define MAX_THREAD_NUM 8 namespace mindspore::lite { -ParallelExecutor::~ParallelExecutor() { - delete pool; - pool = nullptr; -} +ParallelExecutor::~ParallelExecutor() {} int ParallelExecutor::Prepare(std::vector &kernels) { - pool = new ThreadPool(); - if (pool == nullptr) { + int status = ConfigThreadPool(THREAD_POOL_DEFAULT, MAX_THREAD_NUM, NO_BIND); + if (status != 0) { MS_LOG(ERROR) << "Memory error: fail to new ThreadPool"; return RET_ERROR; } - pool->ConfigMaxThreadNum(MAX_THREAD_NUM); - pool->ConfigThreadPool(NO_BIND, MAX_THREAD_NUM); return RET_OK; } -static int RunKernel(int index, TvmEnv *env, void *data) { +static int RunKernel(void *data, int index) { ParallelExecutor *executor = reinterpret_cast(data); auto kernel = executor->GetReadyKernel(index); auto ret = kernel->Run(); @@ -84,7 +80,7 @@ int ParallelExecutor::Run(std::vector &in_tensors, std::vector std::vector newReadyKernels; while (readyKernels.size() > 0) { results.resize(readyKernels.size(), RET_OK); - pool->LaunchWork(RunKernel, this, readyKernels.size()); + ParallelLaunch(THREAD_POOL_DEFAULT, RunKernel, this, readyKernels.size()); if (std::find_if(results.begin(), results.end(), [](const int &ret) { return (ret != 0); }) != results.end()) { return RET_ERROR; diff --git a/mindspore/lite/src/runtime/parallel_executor.h b/mindspore/lite/src/runtime/parallel_executor.h index 492d599110..95dfbbd58f 100644 --- a/mindspore/lite/src/runtime/parallel_executor.h +++ b/mindspore/lite/src/runtime/parallel_executor.h @@ -23,7 +23,6 @@ #include "src/lite_kernel.h" #include "include/lite_session.h" #include "src/executor.h" -#include "src/runtime/thread_pool.h" namespace mindspore::lite { class ParallelExecutor : public Executor { @@ -40,7 +39,6 @@ class ParallelExecutor : public Executor { inline void SetResult(const int index, const int result) { results.at(index) = result; } private: - predict::ThreadPool *pool; std::unordered_map refCount; std::vector readyKernels; std::vector results; diff --git a/mindspore/lite/src/runtime/runtime_api.cc b/mindspore/lite/src/runtime/runtime_api.cc index fa7170404f..374796cdaf 100644 --- a/mindspore/lite/src/runtime/runtime_api.cc +++ b/mindspore/lite/src/runtime/runtime_api.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,26 +14,19 @@ * limitations under the License. */ +#include "src/runtime/runtime_api.h" #include #include -#include "src/runtime/runtime_api.h" #include "src/runtime/workspace_pool.h" -#include "src/runtime/thread_pool.h" #include "utils/log_adapter.h" static std::mutex gWorkspaceMutex; #ifdef __cplusplus extern "C" { #endif -void LiteAPISetLastError(const char *msg) { - MS_LOG(ERROR) << "The lite api set last error is " << msg; -} +void LiteAPISetLastError(const char *msg) { MS_LOG(ERROR) << "The lite api set last error is " << msg; } -void *LiteBackendAllocWorkspace(int deviceType, - int deviceId, - uint64_t size, - int dtypeCode, - int dtypeBits) { +void *LiteBackendAllocWorkspace(int deviceType, int deviceId, uint64_t size, int dtypeCode, int dtypeBits) { std::lock_guard lock(gWorkspaceMutex); auto p = mindspore::predict::WorkspacePool::GetInstance(); if (p == nullptr) { @@ -52,54 +45,6 @@ int LiteBackendFreeWorkspace(int deviceType, int deviceId, void *ptr) { p->FreeWorkSpaceMem(ptr); return 0; } - -void SetMaxWokerNum(int num) { - auto p = mindspore::predict::GlobalThreadPool(); - if (p == nullptr) { - MS_LOG(ERROR) << "Get thread pool instance failed"; - return; - } - if (num < 0) { - LiteAPISetLastError("The number of work thread is less than 0"); - return; - } - p->ConfigMaxThreadNum(num); -} - -void ConfigThreadPool(int mode, int nthreads) { - auto p = mindspore::predict::GlobalThreadPool(); - if (p == nullptr) { - MS_LOG(ERROR) << "Get thread pool instance failed"; - return; - } - p->ConfigThreadPool(mode, nthreads); -} - -int LiteBackendParallelLaunch(FTVMParallelLambda flambda, void *cdata, int num_task) { - auto p = mindspore::predict::GlobalThreadPool(); - if (p == nullptr) { - MS_LOG(ERROR) << "Get thread pool instance failed"; - return -1; - } - if (!p->LaunchWork(flambda, cdata, num_task)) { - MS_LOG(ERROR) << "launch thread pool work failed"; - return -1; - } - return 0; -} - -void DoAllThreadBind(bool ifBind, int mode) { - auto p = mindspore::predict::GlobalThreadPool(); - if (p == nullptr) { - MS_LOG(ERROR) << "Get thread pool instance failed"; - return; - } - if (!p->BindAllThreads(ifBind, mode)) { - MS_LOG(ERROR) << "do thread cpu bind failed"; - } -} - #ifdef __cplusplus } #endif - diff --git a/mindspore/lite/src/runtime/runtime_api.h b/mindspore/lite/src/runtime/runtime_api.h index cd3942d79e..bd6d23380d 100644 --- a/mindspore/lite/src/runtime/runtime_api.h +++ b/mindspore/lite/src/runtime/runtime_api.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,10 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#ifndef MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_API_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_API_H_ +#ifndef PREDICT_SRC_RUNTIME_RUNTIME_API_H_ +#define PREDICT_SRC_RUNTIME_RUNTIME_API_H_ #include +#include "include/thread_pool_config.h" #ifndef INTERNAL_API_DLL #ifdef _WIN32 @@ -32,26 +32,16 @@ #ifdef __cplusplus extern "C" { +#include "src/runtime/thread_pool.h" + #endif -typedef struct { - void *sync_handle; - int32_t num_task; -} LiteParallelGroupEnv; -typedef int (*FTVMParallelLambda)(int task_id, LiteParallelGroupEnv *penv, void *cdata); INTERNAL_API_DLL void LiteAPISetLastError(const char *msg); INTERNAL_API_DLL void *LiteBackendAllocWorkspace(int deviceType, int deviceId, uint64_t size, int dtypeCode, int dtypeBits); INTERNAL_API_DLL int LiteBackendFreeWorkspace(int deviceType, int deviceId, void *ptr); -INTERNAL_API_DLL void SetMaxWokerNum(int num); -INTERNAL_API_DLL void ConfigThreadPool(int mode, int nthreads); -INTERNAL_API_DLL inline void CfgThreadPool(int nthread) { ConfigThreadPool(-1, nthread); } -INTERNAL_API_DLL int LiteBackendParallelLaunch(FTVMParallelLambda flambda, void *cdata, int num_task); INTERNAL_API_DLL int LiteBackendRegisterSystemLibSymbol(const char *name, void *ptr); -INTERNAL_API_DLL void DoAllThreadBind(bool ifBind, int mode); - #ifdef __cplusplus } #endif -#endif // MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_API_H_ - +#endif // PREDICT_SRC_RUNTIME_RUNTIME_API_H_ diff --git a/mindspore/lite/src/runtime/thread_pool.c b/mindspore/lite/src/runtime/thread_pool.c new file mode 100644 index 0000000000..42a85cef0b --- /dev/null +++ b/mindspore/lite/src/runtime/thread_pool.c @@ -0,0 +1,797 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/thread_pool.h" +#include "include/thread_pool_config.h" +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#ifdef __ANDROID__ +#define BIND_CORE +#include +#include +#endif + +#ifdef THREAD_POOL_DEBUG +#include +#define LOG_INFO(content, args...) \ + { printf("[INFO] %s|%d|%s: " #content "\r\n", __FILE__, __LINE__, __func__, ##args); } +#else +#define LOG_INFO(content, args...) +#endif + +#define RET_TP_OK (0) +#define RET_TP_ERROR (1) +#define RET_TP_SYSTEM_ERROR (-1) + +#define MAX_TASK_NUM (2) +#define MAX_THREAD_NUM (8) +#define MAX_THREAD_POOL_NUM (4) +#define DEFAULT_SPIN_COUNT (30000) + +typedef struct { + int (*func)(void *arg, int); + void *content; +} Task; + +typedef struct Thread { + int thread_pool_id; + int thread_id; + struct Thread *next; + pthread_t pthread; + Task *task_list[MAX_TASK_NUM]; + atomic_int task_size; + atomic_int head; + atomic_int tail; + atomic_bool activate; + atomic_bool is_running; + sem_t sem; +} Thread; + +typedef struct { + Thread *head; + Thread *tail; + pthread_mutex_t lock; + int size; +} ThreadList; + +typedef struct ThreadPool { + ThreadList *thread_list; + int thread_num; + CpuBindMode mode; + atomic_bool is_alive; +} ThreadPool; + +static ThreadPool thread_pool_list[MAX_THREAD_POOL_NUM]; +static atomic_int thread_pool_refcount[MAX_THREAD_POOL_NUM] = {ATOMIC_VAR_INIT(0)}; +static atomic_bool thread_pool_is_created[MAX_THREAD_POOL_NUM] = {ATOMIC_VAR_INIT(false)}; + +ThreadPool *GetInstance(int thread_pool_id) { + if (thread_pool_id < 0 || thread_pool_id >= MAX_THREAD_POOL_NUM) { + LOG_INFO("invaid context id: %d", thread_pool_id); + // DestroyThreadPool(thread_pool_id); + return NULL; + } + return &thread_pool_list[thread_pool_id]; +} + +Thread *GetThread(int thread_pool_id, int thread_id) { + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed, thread_pool_id: %d, thread_id: %d", thread_pool_id, thread_id); + return NULL; + } + ThreadList *thread_list = thread_pool->thread_list; + if (thread_list == NULL) { + LOG_INFO("thead list is null"); + return NULL; + } + if (thread_id >= thread_list->size) { + LOG_INFO("invalid thread id: %d, thread_pool_id: %d, thread size: %d", thread_id, thread_pool_id, + thread_list->size); + return NULL; + } + if (thread_id == 0) { + return thread_list->head; + } + Thread *thread = thread_list->head; + while (thread != NULL) { + if (thread->thread_id == thread_id) { + break; + } + thread = thread->next; + } + return thread; +} + +void FreeThread(ThreadList *thread_list, Thread *thread) { + if (thread_list == NULL) { + LOG_INFO("thead list is null"); + return; + } + if (thread == NULL) { + LOG_INFO("thread is nullptr"); + return; + } + // only support sequential release + thread_list->head = thread->next; + sem_post(&thread->sem); + while (thread != NULL && !thread->is_running) { + sem_destroy(&thread->sem); + free(thread); + thread = NULL; + } +} + +#ifdef BIND_CORE +#define MAX_CORE_NUM (16) +static int gCoreNum = 8; +static int gHigNum = 0; +static int gMidNum = 0; +static int cpu_cores[MAX_CORE_NUM]; +static bool run_once = true; + +#define MAX_CPU_ID (9) +#define MAX_PATH_SIZE (256) +typedef struct { + int core_id; + int max_freq; +} CpuInfo; + +int GetCpuCoreNum() { return (int)sysconf(_SC_NPROCESSORS_CONF); } + +static int ConcatCPUPath(int cpuID, const char *str1, const char *str2, char *str3) { + if (cpuID > MAX_CPU_ID || str1 == NULL || str2 == NULL) { + return RET_TP_ERROR; + } + memset(str3, 0, strlen(str3)); + char *tmp = str3; + char id = cpuID + '0'; + memcpy(tmp, str1, strlen(str1)); + tmp += strlen(str1); + memcpy(tmp, &id, 1); + tmp += 1; + memcpy(tmp, str2, strlen(str2)); + return RET_TP_OK; +} + +int GetMaxFrequence(int core_id) { + char path[MAX_PATH_SIZE] = ""; + int ret = ConcatCPUPath(core_id, "/sys/devices/system/cpu/cpufreq/stats/cpu", "/time_in_state", path); + if (ret != RET_TP_OK) { + LOG_INFO("parse cpuid from /sys/devices/system/cpu/cpufreq/stats/cpu/time_in_state failed!"); + return RET_TP_ERROR; + } + FILE *fp = fopen(path, "rb"); + if (fp == NULL) { + ret = ConcatCPUPath(core_id, "/sys/devices/system/cpu/cpufreq/stats/cpu", "/cpufreq/stats/time_in_state", path); + if (ret != RET_TP_OK) { + LOG_INFO("parse cpuid from /sys/devices/system/cpu/cpufreq/stats/cpu/cpufreq/stats/time_instate failed!"); + return RET_TP_ERROR; + } + fp = fopen(path, "rb"); + if (fp == NULL) { + ret = ConcatCPUPath(core_id, "/sys/devices/system/cpu/cpu", "/cpufreq/cpuinfo_max_freq", path); + if (ret != RET_TP_OK) { + LOG_INFO("parse cpuid from /sys/devices/system/cpu/cpufreq/cpuinfo_max_freq failed!"); + return RET_TP_ERROR; + } + fp = fopen(path, "rb"); + if (fp == NULL) { + LOG_INFO("GetCPUMaxFreq failed, cannot find cpuinfo_max_freq."); + return RET_TP_ERROR; + } + int maxFreq = -1; + int result __attribute__((unused)); + result = fscanf(fp, "%d", &maxFreq); + fclose(fp); + return maxFreq; + } + } + int maxFreq = -1; + while (feof(fp) == 0) { + int freq = 0; + int tmp = fscanf(fp, "%d", &freq); + if (tmp != 1) { + break; + } + if (freq > maxFreq) { + maxFreq = freq; + } + } + fclose(fp); + return maxFreq; +} + +int SortCpuProcessor() { + gCoreNum = GetCpuCoreNum(); + if (gCoreNum <= 0) { + LOG_INFO("invalid cpu count"); + return RET_TP_ERROR; + } + CpuInfo freq_set[gCoreNum]; + for (int i = 0; i < gCoreNum; ++i) { + int max_freq = GetMaxFrequence(i); + freq_set[i].core_id = i; + freq_set[i].max_freq = max_freq; + } + // sort core id by frequency + for (int i = 0; i < gCoreNum; ++i) { + for (int j = i + 1; j < gCoreNum; ++j) { + if (freq_set[i].max_freq <= freq_set[j].max_freq) { + CpuInfo temp = freq_set[i]; + freq_set[i] = freq_set[j]; + freq_set[j] = temp; + } + } + } + for (int i = 0; i < gCoreNum; ++i) { + cpu_cores[i] = freq_set[i].core_id; + LOG_INFO("sorted_order: %d, frequency: %d", freq_set[i].core_id, freq_set[i].max_freq); + } + gHigNum = 0; + gMidNum = 0; + int max_freq = freq_set[0].max_freq; + int min_freq = freq_set[gCoreNum - 1].max_freq; + int little = 0; + for (int i = 0; i < gCoreNum; ++i) { + if (freq_set[i].max_freq == max_freq) { + gHigNum++; + } + if (freq_set[i].max_freq == min_freq) { + little++; + } + } + gMidNum = gCoreNum - gHigNum - little; + if (gHigNum == gCoreNum || max_freq == min_freq) { + // fix MTK800 + gHigNum = 2; + gMidNum = 2; + LOG_INFO("core frequency may be wrong."); + } + LOG_INFO("gCoreNum: %d, gHigNum: %d, gMidNum: %d, gLitNum: %d", gCoreNum, gHigNum, gMidNum, little); + return RET_TP_OK; +} + +#ifndef CPU_SET +#define CPU_SETSIZE 1024 +#define __NCPUBITS (8 * sizeof(unsigned long)) +typedef struct { + unsigned long __bits[CPU_SETSIZE / __NCPUBITS]; +} cpu_set_t; +#define CPU_SET(cpu, cpusetp) ((cpusetp)->__bits[(cpu) / __NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS))) +#define CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t)) +#endif // CPU_SET + +int SetAffinity(pthread_t thread_id, cpu_set_t *cpuSet) { +#ifdef __ANDROID__ +#if __ANDROID_API__ >= 21 + LOG_INFO("thread: %d, mask: %lu", pthread_gettid_np(thread_id), cpuSet->__bits[0]); + int ret = sched_setaffinity(pthread_gettid_np(thread_id), sizeof(cpu_set_t), cpuSet); + if (ret != RET_TP_OK) { + LOG_INFO("bind thread %d to cpu failed. ERROR %d", pthread_gettid_np(thread_id), ret); + return RET_TP_OK; + } +#endif +#else +#ifdef __APPLE__ + LOG_INFO("not bind thread to apple's cpu."); + return RET_TP_ERROR; +#else + int ret = pthread_setaffinity_np(thread_id, sizeof(cpu_set_t), cpuSet); + if (ret != RET_TP_OK) { + LOG_INFO("set thread: %lu to cpu failed", thread_id); + return RET_TP_SYSTEM_ERROR; + } +#endif // __APPLE__ +#endif + return RET_TP_OK; +} + +int BindMasterThread(int thread_pool_id, bool is_bind) { + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return RET_TP_ERROR; + } + cpu_set_t mask; + CPU_ZERO(&mask); + if (is_bind) { + unsigned int attach_id; + if (thread_pool->mode == MID_CPU) { + attach_id = cpu_cores[gHigNum + gMidNum - 1]; + } else { + attach_id = cpu_cores[0]; + } + LOG_INFO("mode: %d, attach id: %u", thread_pool->mode, attach_id); + CPU_SET(attach_id, &mask); + } else { + for (int i = 0; i < gHigNum + gMidNum; ++i) { + CPU_SET(cpu_cores[i], &mask); + } + } + int ret = SetAffinity(pthread_self(), &mask); + if (ret != RET_TP_OK) { + LOG_INFO("set master thread affinity failed"); + return RET_TP_ERROR; + } + LOG_INFO("BindMasterThread success."); + return RET_TP_OK; +} + +int BindSalverThreads(int thread_pool_id, bool is_bind) { + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return RET_TP_ERROR; + } + cpu_set_t mask; + if (is_bind && thread_pool->mode != NO_BIND) { + unsigned int attach_id; + for (int i = 0; i < thread_pool->thread_num - 1; ++i) { + if (thread_pool->mode == MID_CPU) { + int core_id = gHigNum + gMidNum - i - 2; + if (core_id >= 0) { + attach_id = cpu_cores[core_id]; + } else { + attach_id = cpu_cores[0]; + } + } else { + attach_id = cpu_cores[i + 1]; + } + LOG_INFO("mode: %d, attach id: %u", thread_pool->mode, attach_id); + CPU_ZERO(&mask); + CPU_SET(attach_id, &mask); + Thread *thread = GetThread(thread_pool_id, i); + if (thread == NULL) { + LOG_INFO("get thread failed, thread_pool_id: %d, thread_id: %d", thread_pool_id, i); + return false; + } + int ret = SetAffinity(thread->pthread, &mask); + if (ret != RET_TP_OK) { + LOG_INFO("set thread affinity failed"); + return RET_TP_ERROR; + } + } + } else { + CPU_ZERO(&mask); + for (int i = 0; i < gHigNum + gMidNum; ++i) { + CPU_SET(cpu_cores[i], &mask); + } + for (int i = 0; i < thread_pool->thread_num - 1; ++i) { + Thread *thread = GetThread(thread_pool_id, i); + if (thread == NULL) { + LOG_INFO("get thread failed, thread_pool_id: %d, thread_id: %d", thread_pool_id, i); + return false; + } + int ret = SetAffinity(thread->pthread, &mask); + if (ret != RET_TP_OK) { + LOG_INFO("set thread affinity failed"); + return RET_TP_ERROR; + } + } + } + LOG_INFO("BindSalverThreads success"); + return RET_TP_OK; +} +#endif + +int BindThreads(int thread_pool_id, bool is_bind, CpuBindMode mode) { +#ifdef BIND_CORE + if (mode == NO_BIND) { + return RET_TP_OK; + } + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return RET_TP_ERROR; + } + thread_pool->mode = mode; + int ret = BindMasterThread(thread_pool_id, is_bind); + if (ret != RET_TP_OK) { + LOG_INFO("bind master thread failed."); + } + ret = BindSalverThreads(thread_pool_id, is_bind); + if (ret != RET_TP_OK) { + LOG_INFO("bind salver thread failed."); + } + return ret; +#else + return RET_TP_OK; +#endif +} + +bool PushTaskToQueue(int thread_pool_id, int thread_id, Task *task) { + Thread *thread = GetThread(thread_pool_id, thread_id); + if (thread == NULL) { + LOG_INFO("get thread failed, thread_pool_id: %d, thread_id: %d", thread_pool_id, thread_id); + return false; + } + const int tail_index = atomic_load_explicit(&thread->tail, memory_order_relaxed); + int next = (tail_index + 1) % MAX_TASK_NUM; + if (next == atomic_load_explicit(&thread->head, memory_order_acquire)) { + return false; + } + thread->task_list[tail_index] = task; + atomic_store_explicit(&thread->tail, next, memory_order_release); + atomic_fetch_add_explicit(&thread->task_size, 1, memory_order_relaxed); + // atomic_store_explicit(&thread->task_size, thread->task_size + 1, memory_order_relaxed); + sem_post(&thread->sem); + return true; +} + +bool PopTaskFromQueue(Thread *thread, Task **task) { + if (thread == NULL) { + LOG_INFO("thread is nullptr"); + return false; + } + if (thread->task_size == 0) { + return false; + } + const int head_index = atomic_load_explicit(&thread->head, memory_order_relaxed); + if (head_index == atomic_load_explicit(&thread->tail, memory_order_acquire)) { + return false; + } + *task = thread->task_list[head_index]; + atomic_store_explicit(&thread->head, (head_index + 1) % MAX_TASK_NUM, memory_order_release); + return true; +} + +void WaitAllThread(int thread_pool_id) { + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return; + } + bool k_success_flag = false; + while (!k_success_flag) { + k_success_flag = true; + for (int i = 0; i < thread_pool->thread_num - 1; ++i) { + Thread *thread = GetThread(thread_pool_id, i); + if (thread == NULL) { + LOG_INFO("get thread failed, thread_pool_id: %d, thread_id: %d", thread_pool_id, i); + return; + } + if (thread->task_size != 0) { + k_success_flag = false; + break; + } + } + } +} + +int DistributeTask(int thread_pool_id, Task *task, int task_num) { + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return RET_TP_ERROR; + } + if (task_num > thread_pool->thread_num || task_num <= 1) { + LOG_INFO("invalid task num: %d, thread num: %d", task_num, thread_pool->thread_num); + return RET_TP_ERROR; + } + bool k_success_flag = false; + int size = thread_pool->thread_num < task_num ? thread_pool->thread_num : task_num; + for (int i = 0; i < size - 1; ++i) { + do { + k_success_flag = true; + if (!PushTaskToQueue(thread_pool_id, i, task)) { + k_success_flag = false; + } + } while (!k_success_flag); + } + // master thread + task->func(task->content, size - 1); + // wait + WaitAllThread(thread_pool_id); + return RET_TP_OK; +} + +int AddTask(int thread_pool_id, int func(void *, int), void *content, int task_num) { + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return RET_TP_ERROR; + } + // if single thread, run master thread + if (thread_pool->thread_num <= 1 || task_num <= 1) { + for (int i = 0; i < task_num; ++i) { + func(content, i); + } + return RET_TP_OK; + } + Task task; + task.func = func; + task.content = content; + return DistributeTask(thread_pool_id, &task, task_num); +} + +int ParallelLaunch(int thread_pool_id, int (*func)(void *, int), void *content, int task_num) { + return AddTask(thread_pool_id, func, content, task_num); +} + +void ThreadRun(Thread *thread) { + ThreadPool *thread_pool = GetInstance(thread->thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return; + } + Task *task = NULL; + int thread_id = thread->thread_id; + int spin_count = 0; + thread->is_running = true; + while (thread_pool->is_alive) { + while (thread->activate) { + if (PopTaskFromQueue(thread, &task)) { + task->func(task->content, thread_id); + atomic_fetch_sub_explicit(&thread->task_size, 1, memory_order_relaxed); + // atomic_store_explicit(&thread->task_size, thread->task_size - 1, memory_order_relaxed); + spin_count = 0; + sem_trywait(&thread->sem); + } else { + sched_yield(); + spin_count++; + } + if (spin_count == DEFAULT_SPIN_COUNT) { + break; + } + } + sem_wait(&thread->sem); + } + thread->is_running = false; +} + +void PushThreadToList(int thread_pool_id, Thread *thread) { + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return; + } + ThreadList *thread_list = thread_pool->thread_list; + if (thread_list == NULL) { + LOG_INFO("thread list is null"); + DestroyThreadPool(thread_pool_id); + return; + } + pthread_mutex_lock(&thread_list->lock); + if (thread_list->size == 0) { + thread_list->head = thread; + thread_list->tail = thread; + } else { + thread_list->tail->next = thread; + thread_list->tail = thread; + } + thread_list->size++; + pthread_mutex_unlock(&thread_list->lock); +} + +int CreateNewThread(int thread_pool_id, int thread_id) { + LOG_INFO("thread_pool_id: %d, create thread: %d", thread_pool_id, thread_id); + Thread *thread = (Thread *)malloc(sizeof(Thread)); + if (thread == NULL) { + LOG_INFO("create thread failed"); + DestroyThreadPool(thread_pool_id); + return RET_TP_ERROR; + } + thread->thread_pool_id = thread_pool_id; + thread->thread_id = thread_id; + thread->head = ATOMIC_VAR_INIT(0); + thread->tail = ATOMIC_VAR_INIT(0); + thread->task_size = ATOMIC_VAR_INIT(0); + thread->activate = ATOMIC_VAR_INIT(true); + thread->is_running = ATOMIC_VAR_INIT(false); + thread->next = NULL; + sem_init(&thread->sem, 0, 0); + PushThreadToList(thread_pool_id, thread); + pthread_create(&thread->pthread, NULL, (void *)ThreadRun, thread); + pthread_detach(thread->pthread); + return RET_TP_OK; +} + +int ReConfigThreadPool(int thread_pool_id, int thread_num, CpuBindMode mode) { + LOG_INFO("reconfig thread pool, thread_pool_id: %d, thread_num: %d, mode: %d", thread_pool_id, thread_num, mode); + if (thread_num <= 0 || thread_num > MAX_THREAD_NUM) { + LOG_INFO("invalid thread num: %d", thread_num); + return RET_TP_ERROR; + } + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return RET_TP_ERROR; + } + if (thread_num <= thread_pool->thread_num) { + LOG_INFO("no need to add thread"); + return RET_TP_OK; + } + int curr_thread_num = thread_pool->thread_num; + thread_pool->thread_num = thread_num > MAX_THREAD_NUM ? MAX_THREAD_NUM : thread_num; + thread_pool->mode = mode; + if (thread_pool->thread_list == NULL) { + thread_pool->thread_list = (ThreadList *)malloc(sizeof(ThreadList)); + if (thread_pool->thread_list == NULL) { + LOG_INFO("create thread list failed"); + DestroyThreadPool(thread_pool_id); + return RET_TP_ERROR; + } + thread_pool->thread_list->head = NULL; + thread_pool->thread_list->tail = NULL; + thread_pool->thread_list->size = 0; + pthread_mutex_init(&thread_pool->thread_list->lock, NULL); + } + int add_thread_num = thread_pool->thread_num - curr_thread_num; + for (int i = curr_thread_num - 1, j = 0; j < add_thread_num; ++i, ++j) { + int ret = CreateNewThread(thread_pool_id, i); + if (ret != RET_TP_OK) { + LOG_INFO("create new thread failed"); + return RET_TP_ERROR; + } + } + return BindThreads(thread_pool_id, true, mode); +} + +int CreateThreadPool(int thread_pool_id, int thread_num, CpuBindMode mode) { + LOG_INFO("create thread pool, thread_pool_id: %d, thread_num: %d, mode: %d", thread_pool_id, thread_num, mode); + if (thread_num <= 0 || thread_num > MAX_THREAD_NUM) { + LOG_INFO("invalid thread num: %d", thread_num); + return RET_TP_ERROR; + } +#ifdef BIND_CORE + if (run_once) { + SortCpuProcessor(); + run_once = false; + } +#endif + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return RET_TP_ERROR; + } + thread_pool->thread_num = thread_num > MAX_THREAD_NUM ? MAX_THREAD_NUM : thread_num; + thread_pool->is_alive = ATOMIC_VAR_INIT(true); + thread_pool->mode = mode; + thread_pool->thread_list = NULL; + if (thread_num > 1) { + thread_pool->thread_list = (ThreadList *)malloc(sizeof(ThreadList)); + if (thread_pool->thread_list == NULL) { + LOG_INFO("create thread list failed"); + DestroyThreadPool(thread_pool_id); + return RET_TP_ERROR; + } + thread_pool->thread_list->head = NULL; + thread_pool->thread_list->tail = NULL; + thread_pool->thread_list->size = 0; + pthread_mutex_init(&thread_pool->thread_list->lock, NULL); + } + for (int i = 0; i < thread_pool->thread_num - 1; ++i) { + int ret = CreateNewThread(thread_pool_id, i); + if (ret != RET_TP_OK) { + LOG_INFO("create thread %d failed", i); + DestroyThreadPool(thread_pool_id); + return RET_TP_ERROR; + } + } + return RET_TP_OK; +} + +int ConfigThreadPool(int thread_pool_id, int thread_num, CpuBindMode mode) { + LOG_INFO("config: thread_pool_id: %d, thread_num: %d, mode: %d, is_created: %d, refcount: %d", thread_pool_id, + thread_num, mode, thread_pool_is_created[thread_pool_id], thread_pool_refcount[thread_pool_id]); + if (thread_pool_id >= MAX_THREAD_POOL_NUM) { + LOG_INFO("invalid context id: %d", thread_pool_id); + return RET_TP_ERROR; + } + if (thread_num <= 0 || thread_num > MAX_THREAD_NUM) { + LOG_INFO("invalid thread num: %d", thread_num); + return RET_TP_ERROR; + } + thread_pool_refcount[thread_pool_id] += 1; + int ret; + if (thread_pool_is_created[thread_pool_id]) { + ret = ReConfigThreadPool(thread_pool_id, thread_num, mode); + if (ret != RET_TP_OK) { + LOG_INFO("reconfig thread pool failed, thread_pool_id: %d, thread_num: %d, mode: %d", thread_pool_id, thread_num, + mode); + } + } else { + thread_pool_is_created[thread_pool_id] = true; + ret = CreateThreadPool(thread_pool_id, thread_num, mode); + if (ret != RET_TP_OK) { + LOG_INFO("create thread pool failed, thread_pool_id: %d, thread_num: %d, mode: %d", thread_pool_id, thread_num, + mode); + } + } + return ret; +} + +void ActivateThreadPool(int thread_pool_id) { + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return; + } + ThreadList *thread_list = thread_pool->thread_list; + if (thread_list == NULL) { + LOG_INFO("thread pool: %d list is null", thread_pool_id); + return; + } + Thread *thread = thread_list->head; + while (thread != NULL) { + sem_post(&thread->sem); + thread->activate = true; + thread = thread->next; + } +} + +void DeactivateThreadPool(int thread_pool_id) { + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return; + } + ThreadList *thread_list = thread_pool->thread_list; + if (thread_list == NULL) { + LOG_INFO("thread pool: %d list is null", thread_pool_id); + return; + } + Thread *thread = thread_list->head; + while (thread != NULL) { + thread->activate = false; + thread = thread->next; + } +} + +void DestroyThreadPool(int thread_pool_id) { + thread_pool_refcount[thread_pool_id]--; + if (thread_pool_refcount[thread_pool_id] > 0) { + LOG_INFO("no need to free, thread_pool_id: %d, refcount: %d", thread_pool_id, thread_pool_refcount[thread_pool_id]); + return; + } + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return; + } + if (thread_pool->thread_list == NULL) { + LOG_INFO("thread pool: %d list is null", thread_pool_id); + return; + } + DeactivateThreadPool(thread_pool_id); + thread_pool_is_created[thread_pool_id] = false; + thread_pool->is_alive = false; + for (int i = 0; i < thread_pool->thread_num - 1; ++i) { + Thread *thread = GetThread(thread_pool_id, i); + if (thread != NULL) { + FreeThread(thread_pool->thread_list, thread); + } + } + free(thread_pool->thread_list); + thread_pool->thread_list = NULL; + LOG_INFO("destroy thread pool success, thread_pool_id: %d, refcount: %d", thread_pool_id, + thread_pool_refcount[thread_pool_id]); +} + +int GetCurrentThreadNum(int thread_pool_id) { + ThreadPool *thread_pool = GetInstance(thread_pool_id); + if (thread_pool == NULL) { + LOG_INFO("get thread pool instane failed"); + return 0; + } + return thread_pool->thread_num; +} diff --git a/mindspore/lite/src/runtime/thread_pool.cc b/mindspore/lite/src/runtime/thread_pool.cc deleted file mode 100644 index ecbad2772f..0000000000 --- a/mindspore/lite/src/runtime/thread_pool.cc +++ /dev/null @@ -1,464 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "src/runtime/thread_pool.h" -#include -#include "utils/log_adapter.h" -#ifdef MS_COMPILE_IOS -#include -#include -#include -#endif // MS_COMPILE_IOS - -namespace mindspore { -namespace predict { -constexpr int kDefaultBigCount = 2; -constexpr int kDefaultMidCount = 2; -constexpr uint32_t kDefaultSpinCount = 300000; -constexpr int kSmallCpuNum = 4; -constexpr int kBigMidCpuNum = 4; -constexpr int kDefaultThreadNum = 1; -static unsigned int kDefaultMaxThreadNums = 8; -static unsigned int localMaxThreadNums = 1; -static ThreadPool globalThreadPool; - -ThreadPool *GlobalThreadPool() { return &globalThreadPool; } - -bool LiteQueue::Enqueue(ThreadPoolTask *task) { - const int tailIndex = tail.load(std::memory_order_relaxed); - // queue full - auto next = (tailIndex + 1) % kSingleThreadMaxTask; - if (next == head.load(std::memory_order_acquire)) { - return false; - } - buffer[tailIndex] = task; - tail.store(next, std::memory_order_release); - ++taskSize; - return true; -} - -bool LiteQueue::Dequeue(ThreadPoolTask **out) { - if (taskSize == 0) { - return false; - } - // queue empty - const int headIndex = head.load(std::memory_order_relaxed); - if (headIndex == tail.load(std::memory_order_acquire)) { - return false; - } - *out = buffer[headIndex]; - head.store((headIndex + 1) % kSingleThreadMaxTask, std::memory_order_release); - return true; -} - -bool LiteThreadBind::Bind(bool ifBind, int numThreads, bool master) { - if (master) { - if (!BindMasterThread(ifBind, bindModel)) { - MS_LOG(ERROR) << "bind msater thread failed"; - return false; - } - MS_LOG(DEBUG) << "bind master thread successful"; - } - if (numThreads > static_cast(sortedCpuIds.size())) { - MS_LOG(ERROR) << "thread num " << numThreads << " is larger than cores " << static_cast(sortedCpuIds.size()) - << " in the system"; - return true; - } - - if (!BindThreads(ifBind)) { - MS_LOG(ERROR) << "action " << ifBind << " thread failed"; - return false; - } - MS_LOG(DEBUG) << "action " << ifBind << " thread successful"; - return true; -} - -void LiteThreadBind::InitSortedCpuId() { - // mate10(970)|p20(970): 4big, 4small - // mate20(980)|p30(980)|mate30(990): 2big, 2mid, 4small - // note: p30's core 7 not allowed to be bind - int numCores = 0; -#ifdef MS_COMPILE_IOS - size_t len = sizeof(numCores); - sysctlbyname("hw.ncpu", &numCores, &len, NULL, 0); - numCores = numCores > 1 ? numCores : 1; -#else - numCores = static_cast(std::thread::hardware_concurrency()); -#endif // MS_COMPILE_IOS - if (numCores < 0) { - MS_LOG(ERROR) << "get numCores return invalid value: " << numCores; - sortedCpuIds.clear(); - return; - } - if (numCores < kBigMidCpuNum) { - bigCore = 0; - midCore = numCores; - } else { - bigCore = kDefaultBigCount; - midCore = kDefaultMidCount; - } - sortedCpuIds.clear(); - for (int i = numCores - 1; i >= 0; --i) { - sortedCpuIds.emplace_back(i); - } - if (sortedCpuIds.size() > kSmallCpuNum) { - sortedCpuIds.resize(bigCore + midCore); - } -} - -bool LiteThreadBind::BindMasterThread(bool bindFlag, int mode) { - std::vector cpu; - if (bindFlag) { - size_t cpuIndex; - if (mode == MID_CORE) { - cpuIndex = sortedCpuIds.size() - 1; - } else { - cpuIndex = 0; - } - cpu.emplace_back(sortedCpuIds[cpuIndex]); - } else { - // unbind master - cpu.assign(sortedCpuIds.begin(), sortedCpuIds.end()); - } - cpu_set_t cpuSet; -#ifndef CPU_SET - (void)memset(&cpuSet, 0, sizeof(cpu_set_t)); -#else - CPU_ZERO(&cpuSet); -#endif - for (auto coreId : cpu) { -#ifndef CPU_SET - CPU_SET_LOCAL(coreId, &cpuSet); -#else - CPU_SET(coreId, &cpuSet); -#endif - } - if (!SetCPUBind(pthread_self(), &cpuSet)) { - MS_LOG(ERROR) << "do master bind failed. mode: " << mode; - return false; - } - return true; -} - -bool LiteThreadBind::BindThreads(bool bindFlag) { - if (bindFlag && bindModel != NO_BIND) { - size_t bindNums = std::min(sortedCpuIds.size(), threadIdList.size()); - cpu_set_t cpuSet; - size_t coreIndex; - for (size_t i = 0; i < bindNums; ++i) { -#ifndef CPU_SET - (void)memset(&cpuSet, 0, sizeof(cpu_set_t)); -#else - CPU_ZERO(&cpuSet); -#endif - if (bindModel == MID_CORE) { - coreIndex = sortedCpuIds.size() - 2 - i; - } else { - coreIndex = i + 1; - } -#ifndef CPU_SET - CPU_SET_LOCAL(sortedCpuIds[coreIndex], &cpuSet); -#else - CPU_SET(sortedCpuIds[coreIndex], &cpuSet); -#endif - if (!SetCPUBind(threadIdList[i], &cpuSet)) { - MS_LOG(ERROR) << "do SetCPUBind failed"; - return false; - } - } - } else { - // unbind - size_t bindNums = std::min(sortedCpuIds.size(), threadIdList.size()); - cpu_set_t cpuSet; -#ifndef CPU_SET - (void)memset(&cpuSet, 0, sizeof(cpu_set_t)); -#else - CPU_ZERO(&cpuSet); -#endif - for (auto coreId : sortedCpuIds) { -#ifndef CPU_SET - CPU_SET_LOCAL(coreId, &cpuSet); -#else - CPU_SET(coreId, &cpuSet); -#endif - } - for (size_t i = 0; i < bindNums; ++i) { - if (!SetCPUBind(threadIdList[i], &cpuSet)) { - MS_LOG(ERROR) << "do SetCPUBind failed"; - return false; - } - } - } - return true; -} - -bool LiteThreadBind::SetCPUBind(pthread_t threadId, cpu_set_t *cpuSet) { -#if defined(__ANDROID__) -#if __ANDROID_API__ >= 21 - int ret = sched_setaffinity(pthread_gettid_np(threadId), sizeof(cpu_set_t), cpuSet); - if (ret != 0) { - MS_LOG(ERROR) << "bind thread " << threadId << "to cpu failed.ERROR " << ret; - } -#endif -#else -#ifdef __APPLE__ - MS_LOG(ERROR) << "not bind thread to apple's cpu."; - return false; -#else -#ifndef _WIN32 - int ret = pthread_setaffinity_np(threadId, sizeof(cpuSet), cpuSet); - if (ret != 0) { - MS_LOG(ERROR) << "bind thread " << threadId << " to cpu failed.ERROR " << ret; - return false; - } -#endif -#endif // __APPLE__ -#endif - return true; -} - -bool ThreadPool::SetThreadPool() { - std::lock_guard Lock(poolMutex); - if (configThreadNums <= 0) { - MS_LOG(WARNING) << "numThreads " << configThreadNums << ", must be greater than 0"; - configThreadNums = curThreadRunNums; - } - if (localMaxThreadNums == 0) { - localMaxThreadNums = 1; - } else if (localMaxThreadNums > kDefaultMaxThreadNums) { - localMaxThreadNums = kDefaultMaxThreadNums; - } - if (configThreadNums > static_cast(kDefaultMaxThreadNums)) { - configThreadNums = kDefaultMaxThreadNums; - } - int addNum = 0; - if (configThreadNums > static_cast(kDefaultMaxThreadNums)) { - addNum = configThreadNums - curThreadRunNums; - } else if (static_cast(localMaxThreadNums) > curThreadNums) { - addNum = localMaxThreadNums - curThreadNums; - } - AddNewThread(addNum); - if (curThreadRunNums > static_cast(localMaxThreadNums)) { - SubRunThread(localMaxThreadNums); - } else { - AddRunThread(localMaxThreadNums); - } - return true; -} - -void ThreadPool::AddNewThread(int newNums) { - for (int i = curThreadNums - 1, j = 0; j < newNums; ++i, ++j) { - auto active = new std::atomic_bool{true}; - auto queue = std::make_shared(); - threadList.emplace_back([this, i, active, queue]() { - ThreadPoolTask *task = nullptr; - uint32_t spin_count = 0; - while (!exitRun) { - while (*active) { - if (queue->Dequeue(&task)) { - auto ret = task->first(i + 1, task->second.tvmParam, task->second.cdata); - if (ret != 0) { - errorInfo.emplace_back(std::make_pair(i + 1, std::make_pair(false, ret))); - } - queue->taskSize--; - spin_count = 0; - } else { - ++spin_count; - } - if (spin_count == kDefaultSpinCount) { - *(activateList[i]) = false; - --curThreadRunNums; - spin_count = 0; - break; - } - std::this_thread::yield(); - } - std::unique_lock queueLock(tMutex); - queueReady.wait(queueLock, [active, this] { return exitRun || *active; }); - } - }); - activateList.emplace_back(active); - queueList.emplace_back(queue); - } - curThreadNums += newNums; - curThreadRunNums += newNums; -} - -bool ThreadPool::SetThreadCpuBind(bool ifBind, int mode, bool master) { - if (curThreadRunNums <= 0) { - MS_LOG(ERROR) << "no threads need to be bind, totalThreadNum : " << curThreadRunNums; - return false; - } - if (threadBind == nullptr) { - threadBind = std::unique_ptr(new LiteThreadBind()); - if (threadBind == nullptr) { - MS_LOG(ERROR) << "create threadBind failed"; - return false; - } - threadBind->threadIdList.resize(kDefaultMaxThreadNums); - threadBind->InitSortedCpuId(); - } - threadBind->threadIdList.clear(); - for (auto &it : threadList) { - threadBind->threadIdList.emplace_back(it.native_handle()); - } - threadBind->bindModel = static_cast(mode); - if (!threadBind->Bind(ifBind, curThreadRunNums, master)) { - MS_LOG(ERROR) << "bind failed"; - return false; - } - return true; -} - -bool ThreadPool::AddTask(WorkFun &&worker, void *cdata, int numTask) { - if (numTask <= 0) { - numTask = curThreadRunNums; - } - TvmEnv env{}; - env.num_task = numTask; - errorInfo.clear(); - // single task, run master thread - if (curThreadRunNums <= 1) { - for (int i = 0; i < numTask; ++i) { - int ret = worker(i, &env, cdata); - if (ret != 0) { - errorInfo.emplace_back(std::make_pair(0, std::make_pair(false, ret))); - } - } - return CheckResult(); - } - ThreadPoolTask task; - task.first = std::move(worker); - task.second.cdata = cdata; - task.second.tvmParam = &env; - return DistributeTask(&task, numTask); -} - -bool ThreadPool::DistributeTask(ThreadPoolTask *task, int numTask) { - auto taskOri = *task; - if (numTask > curThreadRunNums) { - task->first = [taskOri, numTask, this](int task_id, TvmEnv *penv, void *cdata) -> int { - for (int i = task_id; i < numTask; i += curThreadRunNums) { - int ret = taskOri.first(i, penv, cdata); - if (ret != 0) { - errorInfo.emplace_back(std::make_pair(i + 1, std::make_pair(false, ret))); - } - } - return 0; - }; - } - bool kSuccFlag; - auto size = std::min(curThreadRunNums, numTask); - for (int i = 0; i < size - 1; ++i) { - do { - kSuccFlag = true; - if (!queueList[i]->Enqueue(task)) { - std::this_thread::yield(); - kSuccFlag = false; - } - } while (!kSuccFlag); - } - // master thread - int ret = task->first(0, task->second.tvmParam, task->second.cdata); - if (ret != 0) { - errorInfo.emplace_back(std::make_pair(0, std::make_pair(false, ret))); - } - kSuccFlag = false; - while (!kSuccFlag) { - std::this_thread::yield(); - kSuccFlag = true; - for (int i = 0; i < curThreadRunNums - 1; ++i) { - if (queueList[i]->taskSize != 0) { - kSuccFlag = false; - break; - } - } - } - return CheckResult(); -} - -void ThreadPool::AddRunThread(int num) { - int activeNums = num - curThreadRunNums; - if (activeNums <= 0 || static_cast(activateList.size()) < activeNums) { - return; - } - for (int i = curThreadRunNums - 1, j = 0; j < activeNums; ++i, ++j) { - *activateList[i] = true; - } - std::lock_guard queueLock(tMutex); - queueReady.notify_all(); - curThreadRunNums = num; -} - -void ThreadPool::SubRunThread(int num) { - int deactiveNums = curThreadRunNums - num; - if (deactiveNums <= 0) { - return; - } - for (int i = num - 1, j = 0; j < deactiveNums; ++i, ++j) { - *activateList[i] = false; - } - curThreadRunNums = num; -} - -bool ThreadPool::CheckResult() { - bool kSuccFlag = true; - for (auto result : errorInfo) { - if (result.second.first) { - MS_LOG(ERROR) << "task " << result.first << " failed, error code is " << result.second.second; - kSuccFlag = false; - } - } - return kSuccFlag; -} - -bool ThreadPool::LaunchWork(WorkFun worker, void *cdata, int numTask) { - if (!SetThreadPool()) { - return false; - } - return AddTask(std::move(worker), cdata, numTask); -} - -bool ThreadPool::BindAllThreads(bool ifBind, int mode, bool master) { - if (!SetThreadPool()) { - return false; - } - return SetThreadCpuBind(ifBind, mode, master); -} - -void ThreadPool::ConfigThreadPool(int mode, int numThreads) { - configBindMode = mode; - configThreadNums = numThreads; -} - -void ThreadPool::ConfigMaxThreadNum(unsigned int num) { localMaxThreadNums = num; } - -ThreadPool::~ThreadPool() { - curThreadRunNums = static_cast(threadList.size() + 1); - exitRun = true; - SubRunThread(kDefaultThreadNum); - queueReady.notify_all(); - for (auto &it : threadList) { - if (it.joinable()) { - it.join(); - } - } - for (const auto &it : activateList) { - delete it; - } -} -} // namespace predict -} // namespace mindspore diff --git a/mindspore/lite/src/runtime/thread_pool.h b/mindspore/lite/src/runtime/thread_pool.h index 6670f7a932..29f15bf325 100644 --- a/mindspore/lite/src/runtime/thread_pool.h +++ b/mindspore/lite/src/runtime/thread_pool.h @@ -17,111 +17,53 @@ #ifndef MINDSPORE_LITE_SRC_RUNTIME_THREAD_POOL_H_ #define MINDSPORE_LITE_SRC_RUNTIME_THREAD_POOL_H_ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "src/runtime/runtime_api.h" +#include +#include "include/thread_pool_config.h" -namespace mindspore { -namespace predict { -#ifndef CPU_SET -const int CPU_SETSIZE = 1024; -#define __NCPUBITS (8 * sizeof(uint64_t)) -typedef struct { - uint64_t __bits[CPU_SETSIZE / __NCPUBITS]; -} cpu_set_t; +/** + * create thread pool and init + * @param thread_num + * @param mode + */ +int ConfigThreadPool(int context_id, int thread_num, CpuBindMode mode); -#define CPU_SET_LOCAL(cpu, cpusetp) ((cpusetp)->__bits[(cpu) / __NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS))) -#endif +/** + * + * @param session_index, support multi session + * @param job + * @param content + * @param task_num + */ +int ParallelLaunch(int context_id, int (*job)(void *, int), void *content, int task_num); -constexpr int kSingleThreadMaxTask = 2; -using TvmEnv = LiteParallelGroupEnv; -using WorkFun = std::function; -using TaskParam = struct Param { - void *cdata; - TvmEnv *tvmParam; -}; -using ThreadPoolTask = std::pair; -enum AffinityMode : int { BIG_CORE = 1, MID_CORE = -1, NO_BIND = 0 }; +/** + * bind each thread to specified cpu core + * @param is_bind + * @param mode + */ +int BindThreads(int context_id, bool is_bind, CpuBindMode mode); -class LiteQueue { - public: - LiteQueue() = default; - ~LiteQueue() = default; - bool Enqueue(ThreadPoolTask *task); - bool Dequeue(ThreadPoolTask **out); - std::atomic_int taskSize = {0}; +/** + * activate the thread pool + * @param context_id + */ +void ActivateThreadPool(int context_id); - private: - std::atomic_int head = {0}; - std::atomic_int tail = {0}; - ThreadPoolTask *buffer[kSingleThreadMaxTask]{}; -}; +/** + * deactivate the thread pool + * @param context_id + */ +void DeactivateThreadPool(int context_id); -class LiteThreadBind { - public: - LiteThreadBind() = default; - ~LiteThreadBind() = default; - void InitSortedCpuId(); - bool Bind(bool ifBind, int numThreads, bool master); - AffinityMode bindModel = MID_CORE; - std::vector threadIdList; +/** + * + * @return current thread num + */ +int GetCurrentThreadNum(int context_id); - private: - bool BindMasterThread(bool bindFlag, int mode); - bool BindThreads(bool bindFlag); - bool SetCPUBind(pthread_t threadId, cpu_set_t *cpuSet); - int bigCore = 0; - int midCore = 0; - std::vector sortedCpuIds{}; -}; - -class ThreadPool { - public: - ThreadPool() = default; - ~ThreadPool(); - bool LaunchWork(WorkFun worker, void *cdata, int numTask); - void ConfigThreadPool(int mode, int numThreads); - void ConfigMaxThreadNum(unsigned int num); - bool BindAllThreads(bool ifBind, int mode, bool master = true); - ThreadPool(const ThreadPool &) = delete; - ThreadPool &operator=(const ThreadPool &) = delete; - - private: - bool SetThreadPool(); - void AddNewThread(int newNums); - bool SetThreadCpuBind(bool ifBind, int mode, bool master); - bool AddTask(WorkFun &&worker, void *cdata, int numTask); - bool DistributeTask(ThreadPoolTask *task, int numTask); - void AddRunThread(int num); - void SubRunThread(int num); - bool CheckResult(); - - std::mutex poolMutex; - std::mutex tMutex; - std::condition_variable queueReady; - std::atomic_bool exitRun = {false}; - std::vector activateList{}; - int curThreadNums = 1; - int curThreadRunNums = 1; - int configThreadNums = 1; - int configBindMode = -1; - std::vector threadList{}; - std::vector> queueList{}; - std::unique_ptr threadBind{nullptr}; - std::vector>> errorInfo{}; -}; - -ThreadPool* GlobalThreadPool(); -} // namespace predict -} // namespace mindspore +/** + * destroy thread pool, and release resource + */ +void DestroyThreadPool(int context_id); #endif // MINDSPORE_LITE_SRC_RUNTIME_THREAD_POOL_H_ - diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt index 1a324cb850..c078f9c7b0 100644 --- a/mindspore/lite/test/CMakeLists.txt +++ b/mindspore/lite/test/CMakeLists.txt @@ -179,7 +179,7 @@ set(TEST_LITE_SRC ${KERNEL_OP_SRC} ${LITE_DIR}/src/runtime/allocator.cc ${LITE_DIR}/src/runtime/runtime_api.cc - ${LITE_DIR}/src/runtime/thread_pool.cc + ${LITE_DIR}/src/runtime/thread_pool.c ${LITE_DIR}/src/runtime/workspace_pool.cc ${LITE_DIR}/src/runtime/parallel_executor.cc ${LITE_DIR}/src/ir/tensor.cc diff --git a/mindspore/lite/test/ut/src/infer_test.cc b/mindspore/lite/test/ut/src/infer_test.cc index 9de1a2dde1..2cda4426d1 100644 --- a/mindspore/lite/test/ut/src/infer_test.cc +++ b/mindspore/lite/test/ut/src/infer_test.cc @@ -106,7 +106,7 @@ TEST_F(InferTest, TestConvNode) { meta_graph.reset(); content = nullptr; auto context = new lite::Context; - context->cpu_bind_mode_ = lite::NO_BIND; + context->cpu_bind_mode_ = NO_BIND; context->device_ctx_.type = lite::DT_CPU; context->thread_num_ = 4; auto session = session::LiteSession::CreateSession(context); @@ -205,7 +205,7 @@ TEST_F(InferTest, TestAddNode) { meta_graph.reset(); content = nullptr; auto context = new lite::Context; - context->cpu_bind_mode_ = lite::NO_BIND; + context->cpu_bind_mode_ = NO_BIND; context->device_ctx_.type = lite::DT_CPU; context->thread_num_ = 4; auto session = session::LiteSession::CreateSession(context); @@ -307,7 +307,7 @@ TEST_F(InferTest, TestParallelExecutor) { meta_graph.reset(); content = nullptr; auto context = new lite::Context; - context->cpu_bind_mode_ = lite::NO_BIND; + context->cpu_bind_mode_ = NO_BIND; context->device_ctx_.type = lite::DT_CPU; context->thread_num_ = 4; auto session = new SessionWithParallelExecutor(); @@ -348,7 +348,7 @@ TEST_F(InferTest, TestModel) { ASSERT_NE(nullptr, model); delete[] buf[0]; auto context = new lite::Context; - context->cpu_bind_mode_ = lite::NO_BIND; + context->cpu_bind_mode_ = NO_BIND; context->device_ctx_.type = lite::DT_CPU; context->thread_num_ = 4; auto session = session::LiteSession::CreateSession(context); diff --git a/mindspore/lite/tools/converter/CMakeLists.txt b/mindspore/lite/tools/converter/CMakeLists.txt index fe7b9e0836..da853e8781 100644 --- a/mindspore/lite/tools/converter/CMakeLists.txt +++ b/mindspore/lite/tools/converter/CMakeLists.txt @@ -68,7 +68,7 @@ if (WIN32) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/kernel_registry.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/graph_util.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../src/runtime/runtime_api.cc - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/runtime/thread_pool.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/runtime/thread_pool.c ${CMAKE_CURRENT_SOURCE_DIR}/../../src/runtime/workspace_pool.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../src/runtime/allocator.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../src/executor.cc @@ -122,7 +122,7 @@ set(LITE_SRC ${SRC_DIR}/common/ms_tensor_utils.cc ${SRC_DIR}/runtime/allocator.cc ${SRC_DIR}/runtime/runtime_api.cc - ${SRC_DIR}/runtime/thread_pool.cc + ${SRC_DIR}/runtime/thread_pool.c ${SRC_DIR}/runtime/workspace_pool.cc ${SRC_DIR}/ir/tensor.cc ${SRC_DIR}/context.cc