!18311 optimize parallel function call
Merge pull request !18311 from yangjie159/mindrt_thread
This commit is contained in:
commit
fe37f625ca
|
@ -305,4 +305,15 @@ NpuDeviceInfo InnerContext::GetNpuInfo() const {
|
|||
|
||||
// Support CPU backend to judge whether it supports Float16.
|
||||
bool InnerContext::IsSupportFloat16() const { return fp16_flag_; }
|
||||
|
||||
ActorThreadPool *InnerContext::thread_pool() const { return thread_pool_; }
|
||||
|
||||
int ParallelLaunch(const Context *context, const Func &func, Content content, int task_num) {
|
||||
ActorThreadPool *pool = static_cast<const lite::InnerContext *>(context)->thread_pool();
|
||||
if (pool == nullptr) {
|
||||
MS_LOG(ERROR) << "thread pool is nullptr";
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
return pool->ParallelLaunch(func, content, task_num);
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -30,9 +30,6 @@
|
|||
|
||||
namespace mindspore::lite {
|
||||
struct InnerContext : public Context {
|
||||
public:
|
||||
ActorThreadPool *thread_pool_{nullptr};
|
||||
|
||||
public:
|
||||
InnerContext() = default;
|
||||
|
||||
|
@ -64,6 +61,8 @@ struct InnerContext : public Context {
|
|||
|
||||
int IsValid() const;
|
||||
|
||||
ActorThreadPool *thread_pool() const;
|
||||
|
||||
virtual ~InnerContext();
|
||||
|
||||
private:
|
||||
|
@ -83,6 +82,8 @@ struct InnerContext : public Context {
|
|||
|
||||
bool fp16_flag_ = false;
|
||||
|
||||
ActorThreadPool *thread_pool_{nullptr};
|
||||
|
||||
#ifdef ENABLE_ARM
|
||||
#ifndef MS_COMPILE_IOS
|
||||
CpuInfo *cpu_info_ = nullptr;
|
||||
|
@ -95,6 +96,9 @@ struct InnerContext : public Context {
|
|||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
int ParallelLaunch(const Context *context, const Func &func, Content content, int task_num);
|
||||
|
||||
} // namespace mindspore::lite
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_INNER_CONTEXT_H
|
||||
|
|
|
@ -323,7 +323,7 @@ std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel
|
|||
const lite::InnerContext *ctx) {
|
||||
std::vector<std::shared_ptr<LiteOpActor>> actors;
|
||||
std::unordered_map<size_t, AID> partial_map{};
|
||||
auto thread_pool = ctx->thread_pool_;
|
||||
auto thread_pool = ctx->thread_pool();
|
||||
if (thread_pool == nullptr) {
|
||||
MS_LOG(ERROR) << "thread pool is nullptr";
|
||||
return actors;
|
||||
|
|
|
@ -881,7 +881,7 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
|
|||
}
|
||||
|
||||
int LiteSession::InitGPURuntime() {
|
||||
ActorThreadPool *thread_pool = this->context_->thread_pool_;
|
||||
ActorThreadPool *thread_pool = this->context_->thread_pool();
|
||||
if (thread_pool == nullptr) {
|
||||
MS_LOG(ERROR) << "thread pool is nullptr";
|
||||
is_running_.store(false);
|
||||
|
|
|
@ -76,8 +76,7 @@ int ConstantOfShapeCPUKernel::Run() {
|
|||
}
|
||||
thread_stride_ = UP_DIV(param_->element_size_, thread_count);
|
||||
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConstantOfShapeRun, this, thread_count);
|
||||
auto ret = ParallelLaunch(this->context_, ConstantOfShapeRun, this, thread_count);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConstantOfShapeRun error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -236,8 +236,7 @@ int DetectionPostProcessBaseCPUKernel::Run() {
|
|||
return status;
|
||||
}
|
||||
} else {
|
||||
status = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_);
|
||||
status = ParallelLaunch(this->context_, NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "NmsMultiClassesFastCoreRun error error_code[" << status << "]";
|
||||
FreeAllocatedBuffer();
|
||||
|
|
|
@ -165,8 +165,7 @@ int RunPriorBox(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
|||
}
|
||||
|
||||
int PriorBoxCPUKernel::Run() {
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(RunPriorBox, this, thread_count_);
|
||||
int error_code = ParallelLaunch(this->context_, RunPriorBox, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "PriorBox run error, error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -175,8 +175,7 @@ int QuantDTypeCastCPUKernel::Run() {
|
|||
uint8_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_[0]->data_c());
|
||||
}
|
||||
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(QuantDTypeCastRun, this, thread_n_num_);
|
||||
auto ret = ParallelLaunch(this->context_, QuantDTypeCastRun, this, thread_n_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
|
||||
if (in_tensors_[0]->data_type() == TypeId::kNumberTypeInt8 &&
|
||||
|
|
|
@ -70,8 +70,7 @@ int ReshapeRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
|||
int ReshapeBaseCPUKernel::Run() {
|
||||
input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.at(kInputIndex)->data_c());
|
||||
output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.at(kOutputIndex)->data_c());
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ReshapeRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, ReshapeRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -82,8 +82,7 @@ int SliceCPUKernel::Run() {
|
|||
lite::DataTypeSize(in_tensors_.at(0)->data_type()));
|
||||
return RET_OK;
|
||||
}
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(SliceLaunch, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, SliceLaunch, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "slice launch fail!ret: " << ret;
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -125,8 +125,7 @@ int SplitBaseCPUKernel::Run() {
|
|||
output_ptr_.at(i) = output_tensor->data_c();
|
||||
}
|
||||
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(SplitRun, this, thread_n_num_);
|
||||
auto ret = ParallelLaunch(this->context_, SplitRun, this, thread_n_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "split error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -117,8 +117,7 @@ int SplitWithOverlapBaseCPUKernel::Run() {
|
|||
inner_stride_ *= input_shape[i];
|
||||
}
|
||||
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(SplitWithOverlapRun, this, context_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, SplitWithOverlapRun, this, context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ParallelLaunch for SplitWIthOverlapRun run fail. errorcode:[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -100,8 +100,7 @@ int StackBaseCPUKernel::Run() {
|
|||
}
|
||||
// run stack
|
||||
num_threads_ = MSMIN(UP_DIV(outer_size_, 64), op_parameter_->thread_num_);
|
||||
auto ret =
|
||||
static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(StackRun, this, num_threads_);
|
||||
auto ret = ParallelLaunch(this->context_, StackRun, this, num_threads_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -162,8 +162,7 @@ int StridedSliceCPUKernel::FastRun() {
|
|||
}
|
||||
input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.front()->data_c());
|
||||
output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.front()->data_c());
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(StrideRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, StrideRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Stride run error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -128,8 +128,7 @@ int TileCPUKernel::SimpleTileImpl(int task_id) {
|
|||
}
|
||||
|
||||
int TileCPUKernel::RunSimpleTile() {
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(SimpleTile, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, SimpleTile, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "RunSimpleTile error code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -103,8 +103,7 @@ int ActivationFp16CPUKernel::Run() {
|
|||
fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
|
||||
fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
|
||||
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ActivationFp16Run, this, thread_count_);
|
||||
int error_code = ParallelLaunch(this->context_, ActivationFp16Run, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -88,8 +88,7 @@ int AddNFp16CPUKernel::Run() {
|
|||
in1_addr_ = input0_data;
|
||||
in2_addr_ = input1_data;
|
||||
out_addr_ = out_data;
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, AddNLaunch, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "addn launch fail!ret: " << ret;
|
||||
return RET_ERROR;
|
||||
|
@ -97,8 +96,7 @@ int AddNFp16CPUKernel::Run() {
|
|||
for (size_t i = 2; i < in_tensors_.size(); ++i) {
|
||||
in1_addr_ = reinterpret_cast<float16_t *>(in_tensors_[i]->MutableData());
|
||||
in2_addr_ = out_data;
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, AddNLaunch, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i;
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -168,8 +168,7 @@ int ArithmeticCompareFP16CPUKernel::Run() {
|
|||
FreeTmpBuffer();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ArithmeticsRunFp16, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, ArithmeticsRunFp16, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ArithmeticsRunFp16 run error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -182,8 +182,7 @@ int ArithmeticFP16CPUKernel::Run() {
|
|||
FreeFp16Buffer();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ArithmeticsRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, ArithmeticsRun, this, op_parameter_->thread_num_);
|
||||
if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32) {
|
||||
Float16ToFloat32(static_cast<float16_t *>(output_ptr_), reinterpret_cast<float *>(output_tensor->MutableData()),
|
||||
output_tensor->ElementsNum());
|
||||
|
|
|
@ -84,8 +84,7 @@ int ArithmeticSelfFp16CPUKernel::Run() {
|
|||
}
|
||||
output_fp16_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
|
||||
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ArithmeticSelfRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -63,8 +63,7 @@ int BatchnormFp16CPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, BatchNormRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -131,8 +131,7 @@ int CastFp16CPUKernel::Run() {
|
|||
if (data_num_ == 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
return static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(CastFp16Run, this, op_parameter_->thread_num_);
|
||||
return ParallelLaunch(this->context_, CastFp16Run, this, op_parameter_->thread_num_);
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Cast, LiteKernelCreator<CastFp16CPUKernel>)
|
||||
|
|
|
@ -261,16 +261,14 @@ int Convolution1x1FP16CPUKernel::Run() {
|
|||
|
||||
int ret = RET_ERROR;
|
||||
if (multi_thread_by_hw_) {
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(Convolution1x1Fp16RunHw, this, thread_count_);
|
||||
ret = ParallelLaunch(this->context_, Convolution1x1Fp16RunHw, this, thread_count_);
|
||||
} else {
|
||||
#ifdef ENABLE_ARM64
|
||||
RowMajor2Col16MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
|
||||
#else
|
||||
RowMajor2Col12MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
|
||||
#endif
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(Convolution1x1Fp16RunOc, this, thread_count_);
|
||||
ret = ParallelLaunch(this->context_, Convolution1x1Fp16RunOc, this, thread_count_);
|
||||
}
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ParallelLaunch failed.";
|
||||
|
|
|
@ -117,8 +117,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() {
|
|||
}
|
||||
is_repack_ = false;
|
||||
}
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvDwFp16Run, this, conv_param_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, ConvDwFp16Run, this, conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -169,8 +169,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
|
|||
}
|
||||
is_repack_ = false;
|
||||
}
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvDwSWFp16Run, this, conv_param_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, ConvDwSWFp16Run, this, conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -160,8 +160,7 @@ int ConvolutionFP16CPUKernel::Run() {
|
|||
}
|
||||
is_repack_ = false;
|
||||
}
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvolutionFp16Impl, this, thread_count_);
|
||||
ret = ParallelLaunch(this->context_, ConvolutionFp16Impl, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -237,8 +237,7 @@ int ConvolutionWinogradFP16CPUKernel::Run() {
|
|||
}
|
||||
is_repack_ = false;
|
||||
}
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvolutionWinogradFp16Impl, this, thread_count_);
|
||||
ret = ParallelLaunch(this->context_, ConvolutionWinogradFp16Impl, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -52,8 +52,7 @@ int CropFp16CPUKernel::Run() {
|
|||
input_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
|
||||
output_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
|
||||
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(CropFp16Run, this, crop_para_->thread_count_);
|
||||
auto ret = ParallelLaunch(this->context_, CropFp16Run, this, crop_para_->thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ParallelLaunch failed: " << ret;
|
||||
}
|
||||
|
|
|
@ -179,8 +179,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
|
|||
memset(output_ptr, 0, out_tensors_.at(kOutputIndex)->ElementsNum() * sizeof(float16_t));
|
||||
packed_output_ = output_ptr;
|
||||
}
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(DeconvDwFp16Run, this, conv_param_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, DeconvDwFp16Run, this, conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -222,8 +222,7 @@ int DeConvolutionFp16CPUKernel::Run() {
|
|||
|
||||
RowMajor2Col16MajorFp16Opt(batch_input_, pack_input_, input_plane_, conv_param_->input_channel_);
|
||||
|
||||
error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(DeConvFp16Run, this, thread_count_);
|
||||
error_code = ParallelLaunch(this->context_, DeConvFp16Run, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]";
|
||||
}
|
||||
|
|
|
@ -399,15 +399,13 @@ int DeConvWinogradFp16CPUKernel::Run() {
|
|||
nhwc_output_ = output_ptr + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_;
|
||||
|
||||
::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float16_t));
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(DeConvWgFp16Run, this, deconv_param_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, DeConvWgFp16Run, this, deconv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "DeConvWgFp16Run failed!";
|
||||
return ret;
|
||||
}
|
||||
// post bias activate and nhwc
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(DeConvWgPostFp16Run, this, thread_num_hw_);
|
||||
ret = ParallelLaunch(this->context_, DeConvWgPostFp16Run, this, thread_num_hw_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "DeConvWgPostFp16Run failed!";
|
||||
return ret;
|
||||
|
|
|
@ -147,8 +147,7 @@ int GatherFp16CPUKernel::Run() {
|
|||
Float32ToFloat16(reinterpret_cast<float *>(input_tensor->data_c()), input_data_, input_tensor->ElementsNum());
|
||||
}
|
||||
}
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(GatherRunFp16, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, GatherRunFp16, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -108,8 +108,7 @@ int InstanceNormFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_sca
|
|||
int InstanceNormFp16CPUKernel::Run() {
|
||||
src_data_ = reinterpret_cast<float16_t *>(in_tensors_[0]->data_c());
|
||||
dst_data_ = reinterpret_cast<float16_t *>(out_tensors_[0]->data_c());
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(InstanceNormFp16Run, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, InstanceNormFp16Run, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "InstanceNormFp16Run error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -95,8 +95,7 @@ int LayerNormFp16CPUKernel::Run() {
|
|||
var_data_ =
|
||||
reinterpret_cast<float16_t *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float16_t)));
|
||||
}
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(LayerNormFp16Run, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, LayerNormFp16Run, this, op_parameter_->thread_num_);
|
||||
if (out_tensors_.size() != 3) {
|
||||
context_->allocator->Free(mean_data_);
|
||||
context_->allocator->Free(var_data_);
|
||||
|
|
|
@ -95,8 +95,7 @@ int LogSoftmaxLastAxisFp16Run(void *cdata, int task_id, float lhs_scale, float r
|
|||
|
||||
int LogSoftmaxFp16CPUKernel::Run() {
|
||||
if (in_plane_size_ == 1) {
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(LogSoftmaxLastAxisFp16Run, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, LogSoftmaxLastAxisFp16Run, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "LogSoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret;
|
||||
}
|
||||
|
|
|
@ -295,8 +295,7 @@ int MatmulBaseFP16CPUKernel::Run() {
|
|||
batch_b_ptr_ = b_pack_ptr_ + i * params_->deep_ * params_->col_align_;
|
||||
batch_c_ptr_ = c_ptr + i * params_->row_ * params_->col_;
|
||||
}
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(MatmulBaseFP16Run, this, thread_count_);
|
||||
auto ret = ParallelLaunch(this->context_, MatmulBaseFP16Run, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "MatmulBaseFloatRun failed";
|
||||
return ret;
|
||||
|
|
|
@ -101,8 +101,7 @@ int PadFp16CPUKernel::Run() {
|
|||
output_[i] = pad_param_->constant_value_;
|
||||
}
|
||||
}
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(PadImpl, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, PadImpl, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
@ -114,8 +113,7 @@ int PadFp16CPUKernel::Run() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(MirrorPadImpl, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, MirrorPadImpl, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -89,8 +89,7 @@ int PoolingFp16CPUKernel::Run() {
|
|||
fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
|
||||
fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
|
||||
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(PoolingFp16Impl, this, thread_count_);
|
||||
int error_code = ParallelLaunch(this->context_, PoolingFp16Impl, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -86,8 +86,7 @@ int PowerFp16CPUKernel::Run() {
|
|||
return ret;
|
||||
}
|
||||
}
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(PowerImplFp16, this, thread_count_);
|
||||
auto ret = ParallelLaunch(this->context_, PowerImplFp16, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "PowerFp16CPUKernel error: " << ret;
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -163,8 +163,7 @@ int QuantDTypeCastFp16CPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(QuantDTypeCastFP16Run, this, thread_n_num_);
|
||||
auto ret = ParallelLaunch(this->context_, QuantDTypeCastFP16Run, this, thread_n_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -91,8 +91,7 @@ int ReduceFp16CPUKernel::Run() {
|
|||
outer_size_ = outer_sizes_.at(i);
|
||||
inner_size_ = inner_sizes_.at(i);
|
||||
axis_size_ = axis_sizes_.at(i);
|
||||
auto error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ReduceFp16Impl, this, op_parameter_->thread_num_);
|
||||
auto error_code = ParallelLaunch(this->context_, ReduceFp16Impl, this, op_parameter_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
FreeTmpBuffer();
|
||||
MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
|
||||
|
@ -107,8 +106,7 @@ int ReduceFp16CPUKernel::Run() {
|
|||
outer_size_ = outer_sizes_.back();
|
||||
inner_size_ = inner_sizes_.back();
|
||||
axis_size_ = axis_sizes_.back();
|
||||
auto error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ReduceFp16Impl, this, op_parameter_->thread_num_);
|
||||
auto error_code = ParallelLaunch(this->context_, ReduceFp16Impl, this, op_parameter_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
FreeTmpBuffer();
|
||||
MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
|
||||
|
|
|
@ -117,8 +117,7 @@ int ScaleFp16CPUKernel::Run() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ScaleFp16Run, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, ScaleFp16Run, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
|
||||
FreeTmpBuffer();
|
||||
|
|
|
@ -63,8 +63,7 @@ int SliceFp16CPUKernel::Run() {
|
|||
DoSliceNoParallel(input_data, out_tensors_.at(0)->data_c(), param_, lite::DataTypeSize(kNumberTypeFloat16));
|
||||
return RET_OK;
|
||||
}
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(SliceFp16Launch, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, SliceFp16Launch, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "fp16 slice launch fail!ret: " << ret;
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -95,8 +95,7 @@ int SoftmaxLastAxisFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_
|
|||
|
||||
int SoftmaxFp16CPUKernel::Run() {
|
||||
if (in_plane_size_ == 1) {
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(SoftmaxLastAxisFp16Run, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, SoftmaxLastAxisFp16Run, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "SoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret;
|
||||
}
|
||||
|
|
|
@ -101,8 +101,7 @@ int StackFp16CPUKernel::Run() {
|
|||
}
|
||||
// run stack
|
||||
num_threads_ = MSMIN(UP_DIV(outer_size_, 64), this->op_parameter_->thread_num_);
|
||||
ret =
|
||||
static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(StackRun, this, num_threads_);
|
||||
ret = ParallelLaunch(this->context_, StackRun, this, num_threads_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -94,8 +94,7 @@ int ActivationGradRunFp16(void *cdata, int task_id, float lhs_scale, float rhs_s
|
|||
}
|
||||
|
||||
int ActivationGradCPUKernelFp16::Run() {
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ActivationGradRunFp16, this, thread_count_);
|
||||
int error_code = ParallelLaunch(this->context_, ActivationGradRunFp16, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -75,8 +75,7 @@ int ArithmeticGradRunFp16(void *cdata, int task_id, float lhs_scale, float rhs_s
|
|||
}
|
||||
|
||||
int ArithmeticGradCPUKernelFp16::Run() {
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ArithmeticGradRunFp16, this, 1);
|
||||
int error_code = ParallelLaunch(this->context_, ArithmeticGradRunFp16, this, 1);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Arithmetic Grad function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -72,8 +72,7 @@ int ArithmeticSelfGradFp16Run(void *cdata, int task_id, float lhs_scale, float r
|
|||
}
|
||||
|
||||
int ArithmeticSelfGradFp16CPUKernel::Run() {
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ArithmeticSelfGradFp16Run, this, thread_count_);
|
||||
int error_code = ParallelLaunch(this->context_, ArithmeticSelfGradFp16Run, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -83,8 +83,7 @@ int BiasGradFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale)
|
|||
}
|
||||
|
||||
int BiasGradCPUKernelFp16::Run() {
|
||||
int error_code =
|
||||
static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(BiasGradFp16Run, this, 1);
|
||||
int error_code = ParallelLaunch(this->context_, BiasGradFp16Run, this, 1);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "bias function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -141,8 +141,7 @@ int BNGradCPUKernelFp16::Run() {
|
|||
stage_ = 0;
|
||||
thread_num_ = context_->thread_num_;
|
||||
if (thread_num_ == 1) {
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(BNGradFp16Run, this, thread_num_);
|
||||
int error_code = ParallelLaunch(this->context_, BNGradFp16Run, this, thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "BN function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
@ -151,8 +150,7 @@ int BNGradCPUKernelFp16::Run() {
|
|||
const std::vector<int> threads = {thread_num_, 1, thread_num_};
|
||||
for (size_t stage = 0; stage < threads.size(); stage++) {
|
||||
stage_ = static_cast<int>(stage);
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(BNGradFp16Run, this, threads.at(stage));
|
||||
int error_code = ParallelLaunch(this->context_, BNGradFp16Run, this, threads.at(stage));
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "BN function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -191,8 +191,7 @@ int ConvolutionGradFilterCPUKernelFp16::Run() {
|
|||
auto *out_dw = out_tensors_.at(0);
|
||||
auto dw_addr = reinterpret_cast<float16_t *>(out_dw->data_c());
|
||||
memset(dw_addr, 0, out_dw->Size());
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvolutionGradFilterFp16Run, this, context_->thread_num_);
|
||||
int error_code = ParallelLaunch(this->context_, ConvolutionGradFilterFp16Run, this, context_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "conv filter function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -176,8 +176,7 @@ int ConvolutionGradInputCPUKernelFp16::Run() {
|
|||
auto *out_dx = out_tensors_.at(0);
|
||||
auto dx_addr = reinterpret_cast<float16_t *>(out_dx->data_c());
|
||||
memset(dx_addr, 0, sizeof(float16_t) * batch * in_ch * in_h * in_w);
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvolutionGradInputFp16Run, this, context_->thread_num_);
|
||||
int error_code = ParallelLaunch(this->context_, ConvolutionGradInputFp16Run, this, context_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "bias function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -82,8 +82,7 @@ int RunDropoutFp16Grad(void *cdata, int task_id, float lhs_scale, float rhs_scal
|
|||
}
|
||||
|
||||
int DropoutGradCPUKernelFp16::Run() {
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(RunDropoutFp16Grad, this, thread_count_);
|
||||
int error_code = ParallelLaunch(this->context_, RunDropoutFp16Grad, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Dropout Grad function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -96,8 +96,7 @@ int LayerNormF16GradRun(void *cdata, int task_id, float lhs_scale, float rhs_sca
|
|||
}
|
||||
|
||||
int LayerNormGradCPUKernelFp16::Run() {
|
||||
int error_code =
|
||||
static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(LayerNormF16GradRun, this, 1);
|
||||
int error_code = ParallelLaunch(this->context_, LayerNormF16GradRun, this, 1);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "LayerNorm function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -55,8 +55,7 @@ int NegGradCPUKernelFp16::DoNegGrad(int task_id) {
|
|||
int NegGradCPUKernelFp16::ReSize() { return RET_OK; }
|
||||
|
||||
int NegGradCPUKernelFp16::Run() {
|
||||
int ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(NegGradRun, this, thread_count_);
|
||||
int ret = ParallelLaunch(this->context_, NegGradRun, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "parallel launch fail!ret: " << ret;
|
||||
return ret;
|
||||
|
|
|
@ -99,8 +99,7 @@ int PoolingFp16GradImpl(void *cdata, int task_id, float lhs_scale, float rhs_sca
|
|||
|
||||
int PoolingGradCPUKernelFp16::Run() {
|
||||
thread_num_ = context_->thread_num_;
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(PoolingFp16GradImpl, this, thread_num_);
|
||||
int error_code = ParallelLaunch(this->context_, PoolingFp16GradImpl, this, thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -91,8 +91,7 @@ int ResizeGradCPUKernelFp16::Run() {
|
|||
auto out_addr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
|
||||
size_t elem_number = out_tensors_.at(0)->ElementsNum();
|
||||
std::fill(out_addr, out_addr + elem_number, 0.f);
|
||||
int error_code =
|
||||
static_cast<const lite::InnerContext *>(this->context_)->thread_pool_->ParallelLaunch(ResizeFp16GradRun, this, 1);
|
||||
int error_code = ParallelLaunch(this->context_, ResizeFp16GradRun, this, 1);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "ResizeGradCPUKernelFp16 function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -123,8 +123,7 @@ int StridedSliceFp16GradImpl(void *cdata, int task_id, float lhs_scale, float rh
|
|||
}
|
||||
|
||||
int StridedSliceGradCPUKernelFp16::Run() {
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(StridedSliceFp16GradImpl, this, 1);
|
||||
int error_code = ParallelLaunch(this->context_, StridedSliceFp16GradImpl, this, 1);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Strided slice error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -67,8 +67,7 @@ int UnsortedSegmentSumFp16Run(void *cdata, int task_id, float lhs_scale, float r
|
|||
}
|
||||
|
||||
int UnsortedSegmentSumCPUKernelFp16::Run() {
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(UnsortedSegmentSumFp16Run, this, 1);
|
||||
int error_code = ParallelLaunch(this->context_, UnsortedSegmentSumFp16Run, this, 1);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Strided slice error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -107,8 +107,7 @@ int ActivationRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
|||
}
|
||||
|
||||
int ActivationCPUKernel::Run() {
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ActivationRun, this, thread_count_);
|
||||
int error_code = ParallelLaunch(this->context_, ActivationRun, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -121,8 +121,7 @@ int AdderCPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(AdderImpl, this, thread_count_);
|
||||
int error_code = ParallelLaunch(this->context_, AdderImpl, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "adder error error_code[" << error_code << "]";
|
||||
FreeTmpBuffer();
|
||||
|
|
|
@ -88,8 +88,7 @@ int AddNCPUKernel::Run() {
|
|||
in1_addr_ = input0_data;
|
||||
in2_addr_ = input1_data;
|
||||
out_addr_ = output_data;
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, AddNLaunch, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "addn launch fail!ret: " << ret;
|
||||
return RET_ERROR;
|
||||
|
@ -97,8 +96,7 @@ int AddNCPUKernel::Run() {
|
|||
for (size_t i = 2; i < in_tensors_.size(); ++i) {
|
||||
in1_addr_ = reinterpret_cast<float *>(in_tensors_[i]->MutableData());
|
||||
in2_addr_ = output_data;
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(AddNLaunch, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, AddNLaunch, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i;
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -419,8 +419,7 @@ int ArithmeticCPUKernel::Run() {
|
|||
input1_ptr_ = in_tensors_[1]->data_c();
|
||||
}
|
||||
output_ptr_ = out_tensors_[0]->data_c();
|
||||
return static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ArithmeticsRun, this, op_parameter_->thread_num_);
|
||||
return ParallelLaunch(this->context_, ArithmeticsRun, this, op_parameter_->thread_num_);
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MulFusion, LiteKernelCreator<ArithmeticCPUKernel>)
|
||||
|
|
|
@ -114,8 +114,7 @@ int ArithmeticSelfRun(void *cdata, int task_id, float lhs_scale, float rhs_scale
|
|||
}
|
||||
|
||||
int ArithmeticSelfCPUKernel::Run() {
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ArithmeticSelfRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -75,8 +75,7 @@ int BatchnormCPUKernel::InitConstTensor() {
|
|||
}
|
||||
|
||||
int BatchnormCPUKernel::Run() {
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, BatchNormRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -140,8 +140,7 @@ int CastCPUKernel::Run() {
|
|||
if (data_num_ == 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
return static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(CastRun, this, op_parameter_->thread_num_);
|
||||
return ParallelLaunch(this->context_, CastRun, this, op_parameter_->thread_num_);
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Cast, LiteKernelCreator<CastCPUKernel>)
|
||||
|
|
|
@ -69,8 +69,7 @@ int ConcatRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
|||
}
|
||||
|
||||
int ConcatCPUKernel::Run() {
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConcatRun, this, op_parameter_->thread_num_);
|
||||
int error_code = ParallelLaunch(this->context_, ConcatRun, this, op_parameter_->thread_num_);
|
||||
return error_code;
|
||||
}
|
||||
|
||||
|
|
|
@ -256,12 +256,10 @@ int Convolution1x1CPUKernel::Run() {
|
|||
}
|
||||
|
||||
if (multi_thread_by_hw_) {
|
||||
static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(Convolution1x1RunHw, this, thread_count_);
|
||||
ParallelLaunch(this->context_, Convolution1x1RunHw, this, thread_count_);
|
||||
} else {
|
||||
PackMatmulInput(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
|
||||
static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(Convolution1x1Run, this, thread_count_);
|
||||
ParallelLaunch(this->context_, Convolution1x1Run, this, thread_count_);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -133,8 +133,7 @@ int ConvolutionDepthwise3x3CPUKernel::Run() {
|
|||
auto output_tensor = out_tensors_.at(kOutputIndex);
|
||||
output_ptr_ = reinterpret_cast<float *>(output_tensor->data_c());
|
||||
MS_ASSERT(output_ptr_ != nullptr);
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvDw3x3Run, this, conv_param_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, ConvDw3x3Run, this, conv_param_->thread_num_);
|
||||
ctx_->allocator->Free(buffer_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]";
|
||||
|
|
|
@ -116,8 +116,7 @@ int ConvolutionDepthwiseCPUKernel::Run() {
|
|||
output_ptr_ = reinterpret_cast<float *>(output_tensor->data_c());
|
||||
MS_ASSERT(output_ptr_ != nullptr);
|
||||
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvDwRun, this, conv_param_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, ConvDwRun, this, conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -203,8 +203,7 @@ int ConvolutionDepthwiseIndirectCPUKernel::Run() {
|
|||
MS_ASSERT(output_ptr_ != nullptr);
|
||||
ConvDwInitIndirection(indirect_buffer_, packed_input_, zero_ptr_, conv_param_, step_h, step_w);
|
||||
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvDwIndirectRun, this, conv_param_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, ConvDwIndirectRun, this, conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDwIndirectRun error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -171,8 +171,7 @@ int ConvolutionDepthwiseSWCPUKernel::Run() {
|
|||
packed_output_ = output_ptr;
|
||||
}
|
||||
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvDwSWRun, this, conv_param_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, ConvDwSWRun, this, conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -168,8 +168,7 @@ int ConvolutionDepthwiseSWCPUKernelX86::Run() {
|
|||
packed_output_ = output_ptr;
|
||||
}
|
||||
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvDwSWAvxRun, this, conv_param_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, ConvDwSWAvxRun, this, conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDwSWAvxRun error: error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -151,8 +151,7 @@ int ConvolutionCPUKernel::Run() {
|
|||
PackWeight();
|
||||
}
|
||||
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvolutionImpl, this, thread_count_);
|
||||
ret = ParallelLaunch(this->context_, ConvolutionImpl, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "conv error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -183,8 +183,7 @@ int ConvolutionSWCPUKernel::Run() {
|
|||
FreeTmpBuffer();
|
||||
return ret;
|
||||
}
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvolutionSWImpl, this, thread_count_);
|
||||
int error_code = ParallelLaunch(this->context_, ConvolutionSWImpl, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "conv error error_code[" << error_code << "]";
|
||||
FreeTmpBuffer();
|
||||
|
|
|
@ -224,8 +224,7 @@ int ConvolutionWinogradCPUKernel::Run() {
|
|||
}
|
||||
}
|
||||
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ConvolutionWinogradImpl, this, thread_count_);
|
||||
ret = ParallelLaunch(this->context_, ConvolutionWinogradImpl, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -158,8 +158,7 @@ int CropAndResizeCPUKernel::Run() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(CropAndResizeImpl, this, op_parameter_->thread_num_);
|
||||
int error_code = ParallelLaunch(this->context_, CropAndResizeImpl, this, op_parameter_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "CropAndResize run error, error_code[" << error_code << "]";
|
||||
FreeTmpBuffer();
|
||||
|
|
|
@ -61,8 +61,7 @@ int CropCPUKernel::Run() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(CropLaunch, this, crop_para_->thread_count_);
|
||||
auto ret = ParallelLaunch(this->context_, CropLaunch, this, crop_para_->thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Crop launch fail!ret: " << ret;
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -136,8 +136,7 @@ int CumSumCPUKernel::DoCumsumInt(int task_id) {
|
|||
}
|
||||
|
||||
int CumSumCPUKernel::Run() {
|
||||
int ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(CumsumLaunch, this, op_parameter_->thread_num_);
|
||||
int ret = ParallelLaunch(this->context_, CumsumLaunch, this, op_parameter_->thread_num_);
|
||||
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Crop launch fail!ret: " << ret;
|
||||
|
|
|
@ -177,8 +177,7 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
|
|||
packed_output_ = output_addr;
|
||||
}
|
||||
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(DeconvDwRun, this, conv_param_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, DeconvDwRun, this, conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -233,8 +233,7 @@ int DeConvolutionCPUKernel::Run() {
|
|||
RowMajor2Col12Major(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
|
||||
#endif
|
||||
|
||||
error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(DeConvFp32Run, this, thread_count_);
|
||||
error_code = ParallelLaunch(this->context_, DeConvFp32Run, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
|
||||
FreeRunBuf();
|
||||
|
|
|
@ -411,8 +411,7 @@ int DeConvolutionWinogradCPUKernel::Run() {
|
|||
nhwc_output_ = src_out + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_;
|
||||
|
||||
::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float));
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(DeConvWgFp32Run, this, deconv_param_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, DeConvWgFp32Run, this, deconv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
FreeRunBuf();
|
||||
MS_LOG(ERROR) << "DeConvWgFp32Run failed!";
|
||||
|
@ -420,8 +419,7 @@ int DeConvolutionWinogradCPUKernel::Run() {
|
|||
}
|
||||
|
||||
/* post bias activate and nhwc */
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(DeConvWgPostFp32Run, this, thread_num_hw_);
|
||||
ret = ParallelLaunch(this->context_, DeConvWgPostFp32Run, this, thread_num_hw_);
|
||||
if (ret != RET_OK) {
|
||||
FreeRunBuf();
|
||||
MS_LOG(ERROR) << "DeConvWgPostFp32Run failed!";
|
||||
|
|
|
@ -58,8 +58,7 @@ int EluRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
|||
}
|
||||
|
||||
int EluCPUKernel::Run() {
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(EluRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, EluRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -86,8 +86,7 @@ int EmbeddingLookupCPUKernel::Run() {
|
|||
memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum());
|
||||
dest_loc += in_tensors_.at(i)->ElementsNum();
|
||||
}
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(EmbeddingLookupRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, EmbeddingLookupRun, this, op_parameter_->thread_num_);
|
||||
FreeRunBuff();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]";
|
||||
|
|
|
@ -72,8 +72,7 @@ int ExpCPUKernel::Run() {
|
|||
output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
|
||||
exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum();
|
||||
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(ExpRun, this, exp_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, ExpRun, this, exp_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Exp error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -90,8 +90,7 @@ int FillCPUKernel::Run() {
|
|||
MS_LOG(ERROR) << "unsupported fill data type " << fill_input->data_type();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(FillRun, this, thread_sz_count_);
|
||||
auto ret = ParallelLaunch(this->context_, FillRun, this, thread_sz_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "FillRun error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -91,8 +91,7 @@ int FusedBatchnormCPUKernel::Run() {
|
|||
|
||||
trained_ = true; // trained at least once
|
||||
}
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, BatchNormRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -127,8 +127,7 @@ int GatherNdCPUKernel::Run() {
|
|||
in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
|
||||
out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
|
||||
InitOffset();
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(GatherNdRun, this, thread_sz_count_);
|
||||
auto ret = ParallelLaunch(this->context_, GatherNdRun, this, thread_sz_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "gatherNd error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -91,8 +91,7 @@ int GatherCPUKernel::Run() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(GatherRun, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, GatherRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -66,8 +66,7 @@ int InstanceNormCPUKernel::Run() {
|
|||
gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->data_c());
|
||||
beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->data_c());
|
||||
dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(InstanceNormRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_, InstanceNormRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -146,8 +146,7 @@ int L2NormCPUKernel::Run() {
|
|||
int ret;
|
||||
if (l2_norm_param_->axis_num_ == 0 || l2_norm_param_->axis_num_ == input_shape.size()) {
|
||||
// all axis
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(SquareSumRun, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, SquareSumRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
@ -157,15 +156,13 @@ int L2NormCPUKernel::Run() {
|
|||
sum += tmp_sum_[i];
|
||||
}
|
||||
sqrt_sum_ = sqrt(sum > l2_norm_param_->epsilon_ ? sum : l2_norm_param_->epsilon_);
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(L2NormRun, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, L2NormRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else if (l2_norm_param_->axis_num_ == 1 && l2_norm_param_->axis_[0] == static_cast<int>(input_shape.size()) - 1) {
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(L2NormTrailingAxisRun, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, L2NormTrailingAxisRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -92,8 +92,7 @@ int LayerNormCPUKernel::Run() {
|
|||
mean_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float)));
|
||||
var_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float)));
|
||||
}
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(LayerNormRun, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, LayerNormRun, this, op_parameter_->thread_num_);
|
||||
if (out_tensors_.size() != 3) {
|
||||
context_->allocator->Free(mean_data_);
|
||||
context_->allocator->Free(var_data_);
|
||||
|
|
|
@ -72,8 +72,7 @@ int LocalResponseNormRun(void *cdata, int task_id, float lhs_scale, float rhs_sc
|
|||
}
|
||||
|
||||
int LocalResponseNormCPUKernel::Run() {
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(LocalResponseNormRun, this, thread_count_);
|
||||
int error_code = ParallelLaunch(this->context_, LocalResponseNormRun, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "LocalResponseNorm function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -96,8 +96,7 @@ int LogSoftmaxLastAxisRun(void *cdata, int task_id, float lhs_scale, float rhs_s
|
|||
int LogSoftmaxCPUKernel::Run() {
|
||||
int ret = RET_OK;
|
||||
if (in_plane_size_ == 1) {
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(LogSoftmaxLastAxisRun, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, LogSoftmaxLastAxisRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "LogSoftmaxCPUKernel ParallelLaunch failed, ret: " << ret;
|
||||
}
|
||||
|
|
|
@ -60,8 +60,7 @@ int LshProjectionCPUKernel::Run() {
|
|||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(LshProjectionRun, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_, LshProjectionRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "LshProjection kernel parallel launch failed";
|
||||
}
|
||||
|
|
|
@ -426,8 +426,7 @@ int MatmulFp32BaseCPUKernel::Run() {
|
|||
// need not aligned
|
||||
batch_c_ptr_ = output_data_ + i * params_->row_ * params_->col_;
|
||||
}
|
||||
ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(MatmulBaseFloatRun, this, thread_count_);
|
||||
ret = ParallelLaunch(this->context_, MatmulBaseFloatRun, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "MatmulBaseFloatRun failed";
|
||||
}
|
||||
|
|
|
@ -180,8 +180,7 @@ int OneHotCPUKernel::GetParams() {
|
|||
}
|
||||
|
||||
int OneHotCPUKernel::Run() {
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(RunOneHot, this, op_parameter_->thread_num_);
|
||||
int error_code = ParallelLaunch(this->context_, RunOneHot, this, op_parameter_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "OneHot function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -408,8 +408,7 @@ int PadCPUKernel::Run() {
|
|||
output_data[i] = pad_param_->constant_value_;
|
||||
}
|
||||
}
|
||||
error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(PadImpl, this, op_parameter_->thread_num_);
|
||||
error_code = ParallelLaunch(this->context_, PadImpl, this, op_parameter_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
@ -422,8 +421,7 @@ int PadCPUKernel::Run() {
|
|||
return error_code;
|
||||
}
|
||||
|
||||
error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(MirrorPadImpl, this, op_parameter_->thread_num_);
|
||||
error_code = ParallelLaunch(this->context_, MirrorPadImpl, this, op_parameter_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -84,8 +84,7 @@ int PoolingImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
|||
}
|
||||
|
||||
int PoolingCPUKernel::Run() {
|
||||
int error_code = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(PoolingImpl, this, thread_count_);
|
||||
int error_code = ParallelLaunch(this->context_, PoolingImpl, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -40,8 +40,7 @@ int PowerImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
|
|||
}
|
||||
|
||||
int PowerCPUKernel::Run() {
|
||||
auto ret = static_cast<const lite::InnerContext *>(this->context_)
|
||||
->thread_pool_->ParallelLaunch(PowerImpl, this, thread_count_);
|
||||
auto ret = ParallelLaunch(this->context_, PowerImpl, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "PowerCPUKernel error: " << ret;
|
||||
return RET_ERROR;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue