!15176 [MS][LITE]Change lite_kernel.h interface of InnerContext

From: @gongdaguo
Reviewed-by: @jpc_chenjianping,@zhanghaibo5
Signed-off-by: @jpc_chenjianping
This commit is contained in:
mindspore-ci-bot 2021-04-15 19:41:36 +08:00 committed by Gitee
commit c71ae4e831
181 changed files with 580 additions and 328 deletions

View File

@ -841,7 +841,7 @@ table Rsqrt {
} }
table QuantDTypeCast { table QuantDTypeCast {
src_t: long; // deprecated src_t: long;
dst_t: long; dst_t: long;
} }

View File

@ -30,6 +30,7 @@
#include "src/tensor.h" #include "src/tensor.h"
#include "include/errorcode.h" #include "include/errorcode.h"
#include "schema/model_generated.h" #include "schema/model_generated.h"
#include "include/context.h"
namespace mindspore::kernel { namespace mindspore::kernel {
enum KERNEL_ARCH { enum KERNEL_ARCH {
@ -64,7 +65,7 @@ class LiteKernel {
public: public:
LiteKernel() = default; LiteKernel() = default;
LiteKernel(OpParameter *parameter, std::vector<lite::Tensor *> in_tensors, std::vector<lite::Tensor *> out_tensors, LiteKernel(OpParameter *parameter, std::vector<lite::Tensor *> in_tensors, std::vector<lite::Tensor *> out_tensors,
const lite::InnerContext *ctx) const lite::Context *ctx)
: op_parameter_(parameter), : op_parameter_(parameter),
in_tensors_(std::move(in_tensors)), in_tensors_(std::move(in_tensors)),
out_tensors_(std::move(out_tensors)), out_tensors_(std::move(out_tensors)),
@ -175,7 +176,7 @@ class LiteKernel {
SubGraphType subgraph_type() const { return this->subgraph_type_; } SubGraphType subgraph_type() const { return this->subgraph_type_; }
const lite::InnerContext *context() const { return this->context_; } const lite::Context *context() const { return this->context_; }
virtual std::string ToString() const; virtual std::string ToString() const;
@ -202,7 +203,7 @@ class LiteKernel {
// tensor will free in ~lite_session() // tensor will free in ~lite_session()
std::vector<lite::Tensor *> in_tensors_; std::vector<lite::Tensor *> in_tensors_;
std::vector<lite::Tensor *> out_tensors_; std::vector<lite::Tensor *> out_tensors_;
const lite::InnerContext *context_ = nullptr; const lite::Context *context_ = nullptr;
std::vector<LiteKernel *> in_kernels_; std::vector<LiteKernel *> in_kernels_;
std::vector<LiteKernel *> out_kernels_; std::vector<LiteKernel *> out_kernels_;
bool train_mode_ = false; bool train_mode_ = false;
@ -217,13 +218,13 @@ class LiteKernel {
typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs, typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter, const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
const lite::InnerContext *ctx, const KernelKey &desc); const lite::Context *ctx, const KernelKey &desc);
template <class T> template <class T>
kernel::LiteKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter, const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
const lite::InnerContext *ctx, const kernel::KernelKey &desc) { const lite::Context *ctx, const kernel::KernelKey &desc) {
auto *kernel = new (std::nothrow) T(parameter, inputs, outputs, ctx); auto *kernel = new (std::nothrow) T(parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel: " << parameter->name_ << "is nullptr."; MS_LOG(ERROR) << "kernel: " << parameter->name_ << "is nullptr.";
free(parameter); free(parameter);

View File

@ -206,7 +206,8 @@ int SubGraphNpuKernel::Init() {
MS_ASSERT(npu_manager_ != nullptr); MS_ASSERT(npu_manager_ != nullptr);
npu_manager_->AddModel(model_buffer_data, GetOMModelName(), context_->GetNpuInfo().frequency_); npu_manager_->AddModel(model_buffer_data, GetOMModelName(),
static_cast<const lite::InnerContext *>(context_)->GetNpuInfo().frequency_);
executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName(), npu_manager_); executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName(), npu_manager_);

View File

@ -73,7 +73,8 @@ int ConstantOfShapeCPUKernel::Run() {
int thread_count = MSMIN(op_parameter_->thread_num_, param_->element_size_); int thread_count = MSMIN(op_parameter_->thread_num_, param_->element_size_);
thread_stride_ = UP_DIV(param_->element_size_, thread_count); thread_stride_ = UP_DIV(param_->element_size_, thread_count);
auto ret = ParallelLaunch(this->context_->thread_pool_, ConstantOfShapeRun, this, thread_count); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConstantOfShapeRun,
this, thread_count);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConstantOfShapeRun error error_code[" << ret << "]"; MS_LOG(ERROR) << "ConstantOfShapeRun error error_code[" << ret << "]";
return ret; return ret;

View File

@ -144,17 +144,7 @@ void DetectionPostProcessBaseCPUKernel::FreeAllocatedBuffer() {
} }
} }
int DetectionPostProcessBaseCPUKernel::Run() { int DetectionPostProcessBaseCPUKernel::ParamInit() {
MS_ASSERT(context_->allocator != nullptr);
int status = GetInputData();
if (status != RET_OK) {
return status;
}
auto output_boxes = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
auto output_classes = reinterpret_cast<float *>(out_tensors_.at(1)->data_c());
auto output_scores = reinterpret_cast<float *>(out_tensors_.at(2)->data_c());
auto output_num = reinterpret_cast<float *>(out_tensors_.at(3)->data_c());
num_boxes_ = in_tensors_.at(0)->shape().at(1); num_boxes_ = in_tensors_.at(0)->shape().at(1);
num_classes_with_bg_ = in_tensors_.at(1)->shape().at(2); num_classes_with_bg_ = in_tensors_.at(1)->shape().at(2);
params_->decoded_boxes_ = context_->allocator->Malloc(num_boxes_ * 4 * sizeof(float)); params_->decoded_boxes_ = context_->allocator->Malloc(num_boxes_ * 4 * sizeof(float));
@ -221,6 +211,24 @@ int DetectionPostProcessBaseCPUKernel::Run() {
return RET_ERROR; return RET_ERROR;
} }
} }
return RET_OK;
}
int DetectionPostProcessBaseCPUKernel::Run() {
MS_ASSERT(context_->allocator != nullptr);
int status = GetInputData();
if (status != RET_OK) {
return status;
}
auto output_boxes = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
auto output_classes = reinterpret_cast<float *>(out_tensors_.at(1)->data_c());
auto output_scores = reinterpret_cast<float *>(out_tensors_.at(2)->data_c());
auto output_num = reinterpret_cast<float *>(out_tensors_.at(3)->data_c());
if (ParamInit() != RET_OK) {
MS_LOG(ERROR) << "ParamInit error";
return status;
}
status = DecodeBoxes(num_boxes_, input_boxes_, params_->anchors_, params_); status = DecodeBoxes(num_boxes_, input_boxes_, params_->anchors_, params_);
if (status != RET_OK) { if (status != RET_OK) {
@ -238,7 +246,8 @@ int DetectionPostProcessBaseCPUKernel::Run() {
return status; return status;
} }
} else { } else {
status = ParallelLaunch(this->context_->thread_pool_, NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_); status = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_);
if (status != RET_OK) { if (status != RET_OK) {
MS_LOG(ERROR) << "NmsMultiClassesFastCoreRun error error_code[" << status << "]"; MS_LOG(ERROR) << "NmsMultiClassesFastCoreRun error error_code[" << status << "]";
FreeAllocatedBuffer(); FreeAllocatedBuffer();

View File

@ -47,6 +47,7 @@ class DetectionPostProcessBaseCPUKernel : public LiteKernel {
protected: protected:
virtual int GetInputData() = 0; virtual int GetInputData() = 0;
int ParamInit();
private: private:
void FreeAllocatedBuffer(); void FreeAllocatedBuffer();

View File

@ -166,7 +166,8 @@ int RunPriorBox(void *cdata, int task_id) {
} }
int PriorBoxCPUKernel::Run() { int PriorBoxCPUKernel::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, RunPriorBox, this, thread_count_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, RunPriorBox,
this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "PriorBox run error, error_code[" << error_code << "]"; MS_LOG(ERROR) << "PriorBox run error, error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -172,7 +172,8 @@ int QuantDTypeCastCPUKernel::Run() {
uint8_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_[0]->data_c()); uint8_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_[0]->data_c());
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, QuantDTypeCastRun, this, thread_n_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, QuantDTypeCastRun,
this, thread_n_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
if (in_tensors_[0]->data_type() == TypeId::kNumberTypeInt8 && if (in_tensors_[0]->data_type() == TypeId::kNumberTypeInt8 &&

View File

@ -66,7 +66,8 @@ int ReshapeRun(void *cdata, int task_id) {
int ReshapeBaseCPUKernel::Run() { int ReshapeBaseCPUKernel::Run() {
input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.at(kInputIndex)->data_c()); input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.at(kInputIndex)->data_c());
output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.at(kOutputIndex)->data_c()); output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.at(kOutputIndex)->data_c());
auto ret = ParallelLaunch(this->context_->thread_pool_, ReshapeRun, this, context_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ReshapeRun, this,
context_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]"; MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]";
return ret; return ret;

View File

@ -81,7 +81,8 @@ int SliceCPUKernel::Run() {
lite::DataTypeSize(in_tensors_.at(0)->data_type())); lite::DataTypeSize(in_tensors_.at(0)->data_type()));
return RET_OK; return RET_OK;
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, SliceLaunch, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SliceLaunch, this,
op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "slice launch fail!ret: " << ret; MS_LOG(ERROR) << "slice launch fail!ret: " << ret;
return RET_ERROR; return RET_ERROR;

View File

@ -120,7 +120,8 @@ int SplitBaseCPUKernel::Run() {
output_ptr_.at(i) = output_tensor->data_c(); output_ptr_.at(i) = output_tensor->data_c();
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, SplitRun, this, thread_n_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SplitRun, this,
thread_n_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "split error error_code[" << ret << "]"; MS_LOG(ERROR) << "split error error_code[" << ret << "]";
} }

View File

@ -100,7 +100,8 @@ int StackBaseCPUKernel::Run() {
} }
// run stack // run stack
num_threads_ = MSMIN(UP_DIV(outer_size_, 64), this->context_->thread_num_); num_threads_ = MSMIN(UP_DIV(outer_size_, 64), this->context_->thread_num_);
auto ret = ParallelLaunch(this->context_->thread_pool_, StackRun, this, num_threads_); auto ret =
ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, StackRun, this, num_threads_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]"; MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -157,7 +157,8 @@ int StridedSliceCPUKernel::FastRun() {
} }
input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.front()->data_c()); input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.front()->data_c());
output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.front()->data_c()); output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.front()->data_c());
auto ret = ParallelLaunch(this->context_->thread_pool_, StrideRun, this, context_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, StrideRun, this,
context_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Stride run error error_code[" << ret << "]"; MS_LOG(ERROR) << "Stride run error error_code[" << ret << "]";
return ret; return ret;

View File

@ -127,7 +127,8 @@ int TileCPUKernel::SimpleTileImpl(int task_id) {
} }
int TileCPUKernel::RunSimpleTile() { int TileCPUKernel::RunSimpleTile() {
auto ret = ParallelLaunch(context_->thread_pool_, SimpleTile, this, context_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SimpleTile, this,
context_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "RunSimpleTile error code[" << ret << "]"; MS_LOG(ERROR) << "RunSimpleTile error code[" << ret << "]";
return ret; return ret;

View File

@ -100,7 +100,8 @@ int ActivationFp16CPUKernel::Run() {
fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c()); fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c()); fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationFp16Run, this, thread_count_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
ActivationFp16Run, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -160,15 +160,16 @@ int ArithmeticCompareFP16CPUKernel::Run() {
is_input0_fp32_ = in_tensors_.at(0)->data_type() == kNumberTypeFloat32; is_input0_fp32_ = in_tensors_.at(0)->data_type() == kNumberTypeFloat32;
is_input1_fp32_ = in_tensors_.at(1)->data_type() == kNumberTypeFloat32; is_input1_fp32_ = in_tensors_.at(1)->data_type() == kNumberTypeFloat32;
input0_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_); input0_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(0), static_cast<const lite::InnerContext *>(this->context_));
input1_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(1), context_); input1_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast<const lite::InnerContext *>(this->context_));
output_fp16_ = reinterpret_cast<uint8_t *>(output_tensor->MutableData()); output_fp16_ = reinterpret_cast<uint8_t *>(output_tensor->MutableData());
if (input0_fp16_ == nullptr || input1_fp16_ == nullptr || output_fp16_ == nullptr) { if (input0_fp16_ == nullptr || input1_fp16_ == nullptr || output_fp16_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed"; MS_LOG(ERROR) << "Memory allocation failed";
FreeTmpBuffer(); FreeTmpBuffer();
return RET_ERROR; return RET_ERROR;
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticsRunFp16, this, context_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticsRunFp16,
this, context_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ArithmeticsRunFp16 run error error_code[" << ret << "]"; MS_LOG(ERROR) << "ArithmeticsRunFp16 run error error_code[" << ret << "]";
} }

View File

@ -127,13 +127,13 @@ void ArithmeticFP16CPUKernel::InitRunFunction(int primitive_type) {
int ArithmeticFP16CPUKernel::ConstTensorBroadCast() { int ArithmeticFP16CPUKernel::ConstTensorBroadCast() {
int ret; int ret;
if (in_tensors_[0]->data_c() != nullptr) { if (in_tensors_[0]->data_c() != nullptr) {
ret = ConvertFp32TensorToFp16(in_tensors_[0], context_); ret = ConvertFp32TensorToFp16(in_tensors_[0], static_cast<const lite::InnerContext *>(this->context_));
if (ret != RET_OK) { if (ret != RET_OK) {
return ret; return ret;
} }
} }
if (in_tensors_[1]->data_c() != nullptr) { if (in_tensors_[1]->data_c() != nullptr) {
ret = ConvertFp32TensorToFp16(in_tensors_[1], context_); ret = ConvertFp32TensorToFp16(in_tensors_[1], static_cast<const lite::InnerContext *>(this->context_));
if (ret != RET_OK) { if (ret != RET_OK) {
return ret; return ret;
} }
@ -167,18 +167,19 @@ int ArithmeticFP16CPUKernel::Run() {
return RET_ERROR; return RET_ERROR;
} }
if (!input0_broadcast_) { if (!input0_broadcast_) {
input0_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_); input0_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(0), static_cast<const lite::InnerContext *>(this->context_));
} }
if (!input1_broadcast_) { if (!input1_broadcast_) {
input1_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(1), context_); input1_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast<const lite::InnerContext *>(this->context_));
} }
auto output_tensor = out_tensors_.at(0); auto output_tensor = out_tensors_.at(0);
output_ptr_ = MallocOutputFp16(output_tensor, context_); output_ptr_ = MallocOutputFp16(output_tensor, static_cast<const lite::InnerContext *>(this->context_));
if (input0_ptr_ == nullptr || input1_ptr_ == nullptr || output_ptr_ == nullptr) { if (input0_ptr_ == nullptr || input1_ptr_ == nullptr || output_ptr_ == nullptr) {
FreeFp16Buffer(); FreeFp16Buffer();
return RET_ERROR; return RET_ERROR;
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticsRun, this, context_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticsRun, this,
context_->thread_num_);
if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32) { if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32) {
Float16ToFloat32(static_cast<float16_t *>(output_ptr_), reinterpret_cast<float *>(output_tensor->MutableData()), Float16ToFloat32(static_cast<float16_t *>(output_ptr_), reinterpret_cast<float *>(output_tensor->MutableData()),
output_tensor->ElementsNum()); output_tensor->ElementsNum());

View File

@ -77,13 +77,14 @@ int ArithmeticSelfFp16CPUKernel::Run() {
auto output_tensor = out_tensors_.at(0); auto output_tensor = out_tensors_.at(0);
if (input_tensor->data_type() == kNumberTypeFloat32) { if (input_tensor->data_type() == kNumberTypeFloat32) {
input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, context_); input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, static_cast<const lite::InnerContext *>(this->context_));
} else { } else {
input_fp16_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c()); input_fp16_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
} }
output_fp16_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c()); output_fp16_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticSelfRun,
this, op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
} }

View File

@ -51,15 +51,16 @@ int BatchnormFp16CPUKernel::InitConstTensor() {
int BatchnormFp16CPUKernel::Run() { int BatchnormFp16CPUKernel::Run() {
auto input_tensor = in_tensors_.at(0); auto input_tensor = in_tensors_.at(0);
auto output_tensor = out_tensors_.at(0); auto output_tensor = out_tensors_.at(0);
input_ = ConvertInputFp32toFp16(input_tensor, context_); input_ = ConvertInputFp32toFp16(input_tensor, static_cast<const lite::InnerContext *>(this->context_));
output_ = MallocOutputFp16(output_tensor, context_); output_ = MallocOutputFp16(output_tensor, static_cast<const lite::InnerContext *>(this->context_));
if (input_ == nullptr || output_ == nullptr) { if (input_ == nullptr || output_ == nullptr) {
FreeInputAndOutput(); FreeInputAndOutput();
MS_LOG(ERROR) << "input or output is nullptr"; MS_LOG(ERROR) << "input or output is nullptr";
return RET_ERROR; return RET_ERROR;
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, BatchNormRun, this,
op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
} }

View File

@ -132,7 +132,8 @@ int CastFp16CPUKernel::Run() {
if (data_num_ == 0) { if (data_num_ == 0) {
return RET_OK; return RET_OK;
} }
return ParallelLaunch(this->context_->thread_pool_, CastFp16Run, this, op_parameter_->thread_num_); return ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, CastFp16Run, this,
op_parameter_->thread_num_);
} }
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Cast, LiteKernelCreator<CastFp16CPUKernel>) REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Cast, LiteKernelCreator<CastFp16CPUKernel>)

View File

@ -236,14 +236,16 @@ int Convolution1x1FP16CPUKernel::Run() {
int ret = RET_ERROR; int ret = RET_ERROR;
if (multi_thread_by_hw_) { if (multi_thread_by_hw_) {
ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunHw, this, thread_count_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
Convolution1x1Fp16RunHw, this, thread_count_);
} else { } else {
#ifdef ENABLE_ARM64 #ifdef ENABLE_ARM64
RowMajor2Col16MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_); RowMajor2Col16MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
#else #else
RowMajor2Col12MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_); RowMajor2Col12MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
#endif #endif
ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunOc, this, thread_count_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
Convolution1x1Fp16RunOc, this, thread_count_);
} }
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ParallelLaunch failed."; MS_LOG(ERROR) << "ParallelLaunch failed.";

View File

@ -95,10 +95,11 @@ static void SetInputOutputShapeInfo(ConvParameter *conv_param, lite::Tensor *inp
int ConvolutionDelegateFP16CPUKernel::ReSize() { int ConvolutionDelegateFP16CPUKernel::ReSize() {
// Update shape info of input and output // Update shape info of input and output
kernel::SetInputOutputShapeInfo(reinterpret_cast<ConvParameter *>(op_parameter_), in_tensors_.front(), kernel::SetInputOutputShapeInfo(reinterpret_cast<ConvParameter *>(op_parameter_), in_tensors_.front(),
out_tensors_.front(), context_); out_tensors_.front(), static_cast<const lite::InnerContext *>(this->context_));
if (fp16_conv_kernel_ == nullptr) { if (fp16_conv_kernel_ == nullptr) {
fp16_conv_kernel_ = fp16_conv_kernel_ =
CpuConvFp16KernelSelect(in_tensors_, out_tensors_, op_parameter_, context_, origin_weight_, origin_bias_); CpuConvFp16KernelSelect(in_tensors_, out_tensors_, op_parameter_,
static_cast<const lite::InnerContext *>(context_), origin_weight_, origin_bias_);
if (fp16_conv_kernel_ == nullptr) { if (fp16_conv_kernel_ == nullptr) {
MS_LOG(ERROR) << "Selecting execute kernel failed for conv_kernel, got a nullptr."; MS_LOG(ERROR) << "Selecting execute kernel failed for conv_kernel, got a nullptr.";
return RET_ERROR; return RET_ERROR;
@ -184,7 +185,7 @@ kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor
/* creator func */ /* creator func */
kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const InnerContext *ctx, const kernel::KernelKey &desc) { const lite::Context *ctx, const kernel::KernelKey &desc) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion); MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion);
@ -200,11 +201,12 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
kernel::LiteKernel *kernel = nullptr; kernel::LiteKernel *kernel = nullptr;
if (conv_param->group_ == 1) { if (conv_param->group_ == 1) {
kernel = new (std::nothrow) kernel::ConvolutionDelegateFP16CPUKernel(opParameter, inputs, outputs, ctx); kernel = new (std::nothrow) kernel::ConvolutionDelegateFP16CPUKernel(opParameter, inputs, outputs,
static_cast<const lite::InnerContext *>(ctx));
} else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
kernel = CpuConvDwFp16KernelCreator(inputs, outputs, opParameter, ctx); kernel = CpuConvDwFp16KernelCreator(inputs, outputs, opParameter, static_cast<const lite::InnerContext *>(ctx));
} else { } else {
kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, ctx); kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, static_cast<const lite::InnerContext *>(ctx));
} }
if (kernel == nullptr) { if (kernel == nullptr) {

View File

@ -104,7 +104,8 @@ static int ConvDwFp16Run(void *cdata, int task_id) {
} }
int ConvolutionDepthwiseFp16CPUKernel::Run() { int ConvolutionDepthwiseFp16CPUKernel::Run() {
auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwFp16Run, this,
conv_param_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]";
} }

View File

@ -155,7 +155,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
packed_output_ = output_ptr; packed_output_ = output_ptr;
} }
ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWFp16Run, this, conv_param_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwSWFp16Run, this,
conv_param_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]"; MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]";
} }

View File

@ -144,7 +144,8 @@ int ConvolutionFP16CPUKernel::Run() {
return RET_ERROR; return RET_ERROR;
} }
ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionFp16Impl, this, thread_count_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvolutionFp16Impl, this,
thread_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]"; MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]";
} }

View File

@ -213,7 +213,8 @@ int ConvolutionWinogradFP16CPUKernel::Run() {
return RET_ERROR; return RET_ERROR;
} }
ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradFp16Impl, this, thread_count_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
ConvolutionWinogradFp16Impl, this, thread_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]"; MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
} }

View File

@ -53,7 +53,8 @@ int CropFp16CPUKernel::Run() {
input_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c()); input_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
output_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c()); output_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
auto ret = ParallelLaunch(this->context_->thread_pool_, CropFp16Run, this, crop_para_->thread_count_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, CropFp16Run, this,
crop_para_->thread_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ParallelLaunch failed: " << ret; MS_LOG(ERROR) << "ParallelLaunch failed: " << ret;
} }

View File

@ -173,7 +173,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
memset(output_ptr, 0, out_tensors_.at(kOutputIndex)->ElementsNum() * sizeof(float16_t)); memset(output_ptr, 0, out_tensors_.at(kOutputIndex)->ElementsNum() * sizeof(float16_t));
packed_output_ = output_ptr; packed_output_ = output_ptr;
} }
ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwFp16Run, this, conv_param_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeconvDwFp16Run, this,
conv_param_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]"; MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]";
} }

View File

@ -217,7 +217,8 @@ int DeConvolutionFp16CPUKernel::Run() {
RowMajor2Col16MajorFp16Opt(batch_input_, pack_input_, input_plane_, conv_param_->input_channel_); RowMajor2Col16MajorFp16Opt(batch_input_, pack_input_, input_plane_, conv_param_->input_channel_);
error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_); error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvFp16Run,
this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]"; MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]";
} }
@ -229,7 +230,7 @@ int DeConvolutionFp16CPUKernel::Run() {
kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
const lite::InnerContext *ctx, const kernel::KernelKey &desc) { const lite::Context *ctx, const kernel::KernelKey &desc) {
MS_ASSERT(op_parameter != nullptr); MS_ASSERT(op_parameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion); MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion);
@ -238,12 +239,15 @@ kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *>
if (conv_param->group_ == 1) { if (conv_param->group_ == 1) {
if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) && if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) &&
(conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1)) { (conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1)) {
kernel = new (std::nothrow) kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs, ctx); kernel = new (std::nothrow) kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs,
static_cast<const lite::InnerContext *>(ctx));
} else { } else {
kernel = new (std::nothrow) kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, ctx); kernel = new (std::nothrow)
kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
} }
} else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, ctx); kernel = new (std::nothrow)
DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
} }
if (kernel == nullptr) { if (kernel == nullptr) {

View File

@ -392,10 +392,12 @@ int DeConvWinogradFp16CPUKernel::Run() {
nhwc_output_ = output_ptr + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_; nhwc_output_ = output_ptr + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_;
::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float16_t)); ::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float16_t));
ParallelLaunch(this->context_->thread_pool_, DeConvWgFp16Run, this, deconv_param_->thread_num_); ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvWgFp16Run, this,
deconv_param_->thread_num_);
/*post bias activate and nhwc */ /*post bias activate and nhwc */
ParallelLaunch(this->context_->thread_pool_, DeConvWgPostFp16Run, this, thread_num_hw_); ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvWgPostFp16Run, this,
thread_num_hw_);
} }
return RET_OK; return RET_OK;

View File

@ -151,7 +151,8 @@ int GatherFp16CPUKernel::Run() {
Float32ToFloat16(reinterpret_cast<float *>(input_tensor->data_c()), input_data_, input_tensor->ElementsNum()); Float32ToFloat16(reinterpret_cast<float *>(input_tensor->data_c()), input_data_, input_tensor->ElementsNum());
} }
} }
ret = ParallelLaunch(this->context_->thread_pool_, GatherRunFp16, this, op_parameter_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, GatherRunFp16, this,
op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]"; MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]";
} }

View File

@ -109,7 +109,8 @@ int InstanceNormFp16Run(void *cdata, int task_id) {
int InstanceNormFp16CPUKernel::Run() { int InstanceNormFp16CPUKernel::Run() {
src_data_ = reinterpret_cast<float16_t *>(in_tensors_[0]->data_c()); src_data_ = reinterpret_cast<float16_t *>(in_tensors_[0]->data_c());
dst_data_ = reinterpret_cast<float16_t *>(out_tensors_[0]->data_c()); dst_data_ = reinterpret_cast<float16_t *>(out_tensors_[0]->data_c());
auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormFp16Run, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, InstanceNormFp16Run,
this, op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "InstanceNormFp16Run error error_code[" << ret << "]"; MS_LOG(ERROR) << "InstanceNormFp16Run error error_code[" << ret << "]";
return ret; return ret;

View File

@ -95,7 +95,8 @@ int LogSoftmaxLastAxisFp16Run(void *cdata, int task_id) {
int LogSoftmaxFp16CPUKernel::Run() { int LogSoftmaxFp16CPUKernel::Run() {
if (in_plane_size_ == 1) { if (in_plane_size_ == 1) {
auto ret = ParallelLaunch(this->context_->thread_pool_, LogSoftmaxLastAxisFp16Run, this, context_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
LogSoftmaxLastAxisFp16Run, this, context_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "LogSoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret; MS_LOG(ERROR) << "LogSoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret;
} }

View File

@ -286,7 +286,8 @@ int MatmulBaseFP16CPUKernel::Run() {
batch_b_ptr_ = b_pack_ptr_ + i * params_->deep_ * params_->col_align_; batch_b_ptr_ = b_pack_ptr_ + i * params_->deep_ * params_->col_align_;
batch_c_ptr_ = c_ptr + i * params_->row_ * params_->col_; batch_c_ptr_ = c_ptr + i * params_->row_ * params_->col_;
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, MatmulBaseFP16Run, this, thread_count_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, MatmulBaseFP16Run,
this, thread_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "MatmulBaseFloatRun failed"; MS_LOG(ERROR) << "MatmulBaseFloatRun failed";
return ret; return ret;

View File

@ -89,7 +89,8 @@ int PadFp16CPUKernel::Run() {
output_[i] = pad_param_->constant_value_; output_[i] = pad_param_->constant_value_;
} }
} }
ret = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, op_parameter_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PadImpl, this,
op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
} }
@ -101,7 +102,8 @@ int PadFp16CPUKernel::Run() {
return ret; return ret;
} }
ret = ParallelLaunch(this->context_->thread_pool_, MirrorPadImpl, this, context_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, MirrorPadImpl, this,
context_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << ret << "]"; MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << ret << "]";
} }

View File

@ -90,7 +90,8 @@ int PoolingFp16CPUKernel::Run() {
fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c()); fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c()); fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
int error_code = ParallelLaunch(this->context_->thread_pool_, PoolingFp16Impl, this, thread_count_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
PoolingFp16Impl, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]"; MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -87,7 +87,8 @@ int PowerFp16CPUKernel::Run() {
return ret; return ret;
} }
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, PowerImplFp16, this, thread_count_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PowerImplFp16, this,
thread_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "PowerFp16CPUKernel error: " << ret; MS_LOG(ERROR) << "PowerFp16CPUKernel error: " << ret;
return RET_ERROR; return RET_ERROR;

View File

@ -164,7 +164,8 @@ int QuantDTypeCastFp16CPUKernel::Run() {
return RET_ERROR; return RET_ERROR;
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, QuantDTypeCastFP16Run, this, thread_n_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
QuantDTypeCastFP16Run, this, thread_n_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -93,7 +93,8 @@ int ReduceFp16CPUKernel::Run() {
outer_size_ = outer_sizes_.at(i); outer_size_ = outer_sizes_.at(i);
inner_size_ = inner_sizes_.at(i); inner_size_ = inner_sizes_.at(i);
axis_size_ = axis_sizes_.at(i); axis_size_ = axis_sizes_.at(i);
auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceFp16Impl, this, context_->thread_num_); auto error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
ReduceFp16Impl, this, context_->thread_num_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
FreeTmpBuffer(); FreeTmpBuffer();
MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
@ -108,7 +109,8 @@ int ReduceFp16CPUKernel::Run() {
outer_size_ = outer_sizes_.back(); outer_size_ = outer_sizes_.back();
inner_size_ = inner_sizes_.back(); inner_size_ = inner_sizes_.back();
axis_size_ = axis_sizes_.back(); axis_size_ = axis_sizes_.back();
auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceFp16Impl, this, context_->thread_num_); auto error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
ReduceFp16Impl, this, context_->thread_num_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
FreeTmpBuffer(); FreeTmpBuffer();
MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";

View File

@ -115,7 +115,8 @@ int ScaleFp16CPUKernel::Run() {
return ret; return ret;
} }
ret = ParallelLaunch(this->context_->thread_pool_, ScaleFp16Run, this, op_parameter_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ScaleFp16Run, this,
op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
FreeTmpBuffer(); FreeTmpBuffer();
@ -127,12 +128,12 @@ int ScaleFp16CPUKernel::Run() {
} }
int ScaleFp16CPUKernel::MallocAssignTmpBuffer() { int ScaleFp16CPUKernel::MallocAssignTmpBuffer() {
scale_ = ConvertInputFp32toFp16(in_tensors_.at(1), context_); scale_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast<const lite::InnerContext *>(this->context_));
if (scale_ == nullptr) { if (scale_ == nullptr) {
return RET_ERROR; return RET_ERROR;
} }
if (in_tensors_.size() == 3) { if (in_tensors_.size() == 3) {
offset_ = ConvertInputFp32toFp16(in_tensors_.at(2), context_); offset_ = ConvertInputFp32toFp16(in_tensors_.at(2), static_cast<const lite::InnerContext *>(this->context_));
if (offset_ == nullptr) { if (offset_ == nullptr) {
return RET_ERROR; return RET_ERROR;
} }

View File

@ -63,7 +63,8 @@ int SliceFp16CPUKernel::Run() {
DoSliceNoParallel(input_data, out_tensors_.at(0)->data_c(), param_, lite::DataTypeSize(kNumberTypeFloat16)); DoSliceNoParallel(input_data, out_tensors_.at(0)->data_c(), param_, lite::DataTypeSize(kNumberTypeFloat16));
return RET_OK; return RET_OK;
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, SliceFp16Launch, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SliceFp16Launch,
this, op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "fp16 slice launch fail!ret: " << ret; MS_LOG(ERROR) << "fp16 slice launch fail!ret: " << ret;
return RET_ERROR; return RET_ERROR;

View File

@ -95,7 +95,8 @@ int SoftmaxLastAxisFp16Run(void *cdata, int task_id) {
int SoftmaxFp16CPUKernel::Run() { int SoftmaxFp16CPUKernel::Run() {
if (in_plane_size_ == 1) { if (in_plane_size_ == 1) {
auto ret = ParallelLaunch(this->context_->thread_pool_, SoftmaxLastAxisFp16Run, this, context_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
SoftmaxLastAxisFp16Run, this, context_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "SoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret; MS_LOG(ERROR) << "SoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret;
} }

View File

@ -40,14 +40,15 @@ void StackFp16CPUKernel::InitMallocFlags() {
int StackFp16CPUKernel::MallocAssignBuffer() { int StackFp16CPUKernel::MallocAssignBuffer() {
buffers_.resize(in_tensors_.size(), nullptr); buffers_.resize(in_tensors_.size(), nullptr);
for (size_t i = 0; i < in_tensors_.size(); ++i) { for (size_t i = 0; i < in_tensors_.size(); ++i) {
buffers_.at(i) = reinterpret_cast<char *>(ConvertInputFp32toFp16(in_tensors_.at(i), context_)); buffers_.at(i) = reinterpret_cast<char *>(
ConvertInputFp32toFp16(in_tensors_.at(i), static_cast<const lite::InnerContext *>(context_)));
if (buffers_.at(i) == nullptr) { if (buffers_.at(i) == nullptr) {
return RET_ERROR; return RET_ERROR;
} }
} }
out_buffer_ = nullptr; out_buffer_ = nullptr;
out_buffer_ = MallocOutputFp16(out_tensors_.at(0), context_); out_buffer_ = MallocOutputFp16(out_tensors_.at(0), static_cast<const lite::InnerContext *>(this->context_));
if (out_buffer_ == nullptr) { if (out_buffer_ == nullptr) {
return RET_ERROR; return RET_ERROR;
} }
@ -100,7 +101,8 @@ int StackFp16CPUKernel::Run() {
} }
// run stack // run stack
num_threads_ = MSMIN(UP_DIV(outer_size_, 64), this->context_->thread_num_); num_threads_ = MSMIN(UP_DIV(outer_size_, 64), this->context_->thread_num_);
ret = ParallelLaunch(this->context_->thread_pool_, StackRun, this, num_threads_); ret =
ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, StackRun, this, num_threads_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]"; MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -79,7 +79,8 @@ int ActivationGradRunFp16(void *cdata, int task_id) {
} }
int ActivationGradCPUKernelFp16::Run() { int ActivationGradCPUKernelFp16::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationGradRunFp16, this, thread_count_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
ActivationGradRunFp16, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]"; MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -73,7 +73,8 @@ int ArithmeticSelfGradFp16Run(void *cdata, int task_id) {
} }
int ArithmeticSelfGradFp16CPUKernel::Run() { int ArithmeticSelfGradFp16CPUKernel::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfGradFp16Run, this, thread_count_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
ArithmeticSelfGradFp16Run, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]"; MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -101,7 +101,8 @@ int ActivationRun(void *cdata, int task_id) {
} }
int ActivationCPUKernel::Run() { int ActivationCPUKernel::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationRun, this, thread_count_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ActivationRun,
this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -122,7 +122,8 @@ int AdderCPUKernel::Run() {
return RET_ERROR; return RET_ERROR;
} }
int error_code = ParallelLaunch(this->context_->thread_pool_, AdderImpl, this, thread_count_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, AdderImpl,
this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "adder error error_code[" << error_code << "]"; MS_LOG(ERROR) << "adder error error_code[" << error_code << "]";
FreeTmpBuffer(); FreeTmpBuffer();

View File

@ -89,7 +89,8 @@ int AddNCPUKernel::Run() {
in1_addr_ = input0_data; in1_addr_ = input0_data;
in2_addr_ = input1_data; in2_addr_ = input1_data;
out_addr_ = output_data; out_addr_ = output_data;
auto ret = ParallelLaunch(this->context_->thread_pool_, AddNLaunch, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, AddNLaunch, this,
op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "addn launch fail!ret: " << ret; MS_LOG(ERROR) << "addn launch fail!ret: " << ret;
return RET_ERROR; return RET_ERROR;
@ -97,7 +98,8 @@ int AddNCPUKernel::Run() {
for (size_t i = 2; i < in_tensors_.size(); ++i) { for (size_t i = 2; i < in_tensors_.size(); ++i) {
in1_addr_ = reinterpret_cast<float *>(in_tensors_[i]->MutableData()); in1_addr_ = reinterpret_cast<float *>(in_tensors_[i]->MutableData());
in2_addr_ = output_data; in2_addr_ = output_data;
ret = ParallelLaunch(this->context_->thread_pool_, AddNLaunch, this, op_parameter_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, AddNLaunch, this,
op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i; MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i;
return RET_ERROR; return RET_ERROR;

View File

@ -418,7 +418,8 @@ int ArithmeticCPUKernel::Run() {
input1_ptr_ = in_tensors_[1]->data_c(); input1_ptr_ = in_tensors_[1]->data_c();
} }
output_ptr_ = out_tensors_[0]->data_c(); output_ptr_ = out_tensors_[0]->data_c();
return ParallelLaunch(this->context_->thread_pool_, ArithmeticsRun, this, context_->thread_num_); return ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticsRun, this,
context_->thread_num_);
} }
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MulFusion, LiteKernelCreator<ArithmeticCPUKernel>) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MulFusion, LiteKernelCreator<ArithmeticCPUKernel>)

View File

@ -113,7 +113,8 @@ int ArithmeticSelfRun(void *cdata, int task_id) {
} }
int ArithmeticSelfCPUKernel::Run() { int ArithmeticSelfCPUKernel::Run() {
auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticSelfRun,
this, op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
} }

View File

@ -75,7 +75,8 @@ int BatchnormCPUKernel::InitConstTensor() {
} }
int BatchnormCPUKernel::Run() { int BatchnormCPUKernel::Run() {
auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, BatchNormRun, this,
op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
} }

View File

@ -141,7 +141,8 @@ int CastCPUKernel::Run() {
if (data_num_ == 0) { if (data_num_ == 0) {
return RET_OK; return RET_OK;
} }
return ParallelLaunch(this->context_->thread_pool_, CastRun, this, op_parameter_->thread_num_); return ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, CastRun, this,
op_parameter_->thread_num_);
} }
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Cast, LiteKernelCreator<CastCPUKernel>) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Cast, LiteKernelCreator<CastCPUKernel>)

View File

@ -69,7 +69,8 @@ int ConcatRun(void *cdata, int task_id) {
} }
int ConcatCPUKernel::Run() { int ConcatCPUKernel::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, ConcatRun, this, op_parameter_->thread_num_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConcatRun,
this, op_parameter_->thread_num_);
return error_code; return error_code;
} }

View File

@ -247,10 +247,12 @@ int Convolution1x1CPUKernel::Run() {
} }
if (multi_thread_by_hw_) { if (multi_thread_by_hw_) {
ParallelLaunch(this->context_->thread_pool_, Convolution1x1RunHw, this, thread_count_); ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, Convolution1x1RunHw, this,
thread_count_);
} else { } else {
PackMatmulInput(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_); PackMatmulInput(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
ParallelLaunch(this->context_->thread_pool_, Convolution1x1Run, this, thread_count_); ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, Convolution1x1Run, this,
thread_count_);
} }
} }

View File

@ -138,16 +138,19 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32KernelSelect() {
kernel::LiteKernel *kernel = nullptr; kernel::LiteKernel *kernel = nullptr;
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_); auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_);
if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) { if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) {
kernel = new (std::nothrow) kernel = new (std::nothrow) kernel::Convolution1x1CPUKernel(op_parameter_, in_tensors_, out_tensors_,
kernel::Convolution1x1CPUKernel(op_parameter_, in_tensors_, out_tensors_, context_, origin_weight_, origin_bias_); static_cast<const lite::InnerContext *>(this->context_),
origin_weight_, origin_bias_);
} else { } else {
int out_unit; int out_unit;
if (CheckIfUseWinograd(&out_unit, conv_param)) { if (CheckIfUseWinograd(&out_unit, conv_param)) {
kernel = new (std::nothrow) kernel::ConvolutionWinogradCPUKernel( kernel = new (std::nothrow) kernel::ConvolutionWinogradCPUKernel(
op_parameter_, in_tensors_, out_tensors_, context_, out_unit, origin_weight_, origin_bias_); op_parameter_, in_tensors_, out_tensors_, static_cast<const lite::InnerContext *>(this->context_), out_unit,
origin_weight_, origin_bias_);
} else { } else {
kernel = new (std::nothrow) kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(op_parameter_, in_tensors_, out_tensors_,
kernel::ConvolutionCPUKernel(op_parameter_, in_tensors_, out_tensors_, context_, origin_weight_, origin_bias_); static_cast<const lite::InnerContext *>(this->context_),
origin_weight_, origin_bias_);
} }
} }
@ -214,7 +217,7 @@ kernel::LiteKernel *CpuGroupConvFp32KernelCreator(const std::vector<lite::Tensor
/* creator func */ /* creator func */
kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
const InnerContext *ctx, const kernel::KernelKey &desc) { const lite::Context *ctx, const kernel::KernelKey &desc) {
MS_ASSERT(op_parameter != nullptr); MS_ASSERT(op_parameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion); MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion);
MS_ASSERT(desc.data_type == kNumberTypeFloat32); MS_ASSERT(desc.data_type == kNumberTypeFloat32);
@ -222,11 +225,12 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter); auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter);
kernel::LiteKernel *kernel = nullptr; kernel::LiteKernel *kernel = nullptr;
if (conv_param->group_ == 1) { if (conv_param->group_ == 1) {
kernel = new (std::nothrow) kernel::ConvolutionDelegateCPUKernel(op_parameter, inputs, outputs, ctx); kernel = new (std::nothrow)
kernel::ConvolutionDelegateCPUKernel(op_parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
} else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
kernel = CpuConvDwFp32KernelCreator(inputs, outputs, op_parameter, ctx); kernel = CpuConvDwFp32KernelCreator(inputs, outputs, op_parameter, static_cast<const lite::InnerContext *>(ctx));
} else { } else {
kernel = CpuGroupConvFp32KernelCreator(inputs, outputs, op_parameter, ctx); kernel = CpuGroupConvFp32KernelCreator(inputs, outputs, op_parameter, static_cast<const lite::InnerContext *>(ctx));
} }
if (kernel == nullptr) { if (kernel == nullptr) {

View File

@ -126,7 +126,8 @@ int ConvolutionDepthwise3x3CPUKernel::Run() {
auto output_tensor = out_tensors_.at(kOutputIndex); auto output_tensor = out_tensors_.at(kOutputIndex);
output_ptr_ = reinterpret_cast<float *>(output_tensor->data_c()); output_ptr_ = reinterpret_cast<float *>(output_tensor->data_c());
auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDw3x3Run, this, conv_param_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDw3x3Run, this,
conv_param_->thread_num_);
ctx_->allocator->Free(buffer_); ctx_->allocator->Free(buffer_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]"; MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]";

View File

@ -107,7 +107,8 @@ int ConvolutionDepthwiseCPUKernel::Run() {
auto output_tensor = out_tensors_.at(kOutputIndex); auto output_tensor = out_tensors_.at(kOutputIndex);
output_ptr_ = reinterpret_cast<float *>(output_tensor->MutableData()); output_ptr_ = reinterpret_cast<float *>(output_tensor->MutableData());
auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwRun, this, conv_param_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwRun, this,
conv_param_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]"; MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -194,7 +194,8 @@ int ConvolutionDepthwiseIndirectCPUKernel::Run() {
ConvDwInitIndirection(indirect_buffer_, packed_input_, zero_ptr_, conv_param_, step_h, step_w); ConvDwInitIndirection(indirect_buffer_, packed_input_, zero_ptr_, conv_param_, step_h, step_w);
auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwIndirectRun, this, conv_param_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwIndirectRun,
this, conv_param_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwIndirectRun error: error_code[" << ret << "]"; MS_LOG(ERROR) << "ConvDwIndirectRun error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -163,7 +163,8 @@ int ConvolutionDepthwiseSWCPUKernel::Run() {
packed_output_ = output_ptr; packed_output_ = output_ptr;
} }
ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWRun, this, conv_param_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwSWRun, this,
conv_param_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]"; MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]";
} }

View File

@ -152,7 +152,8 @@ int ConvolutionCPUKernel::Run() {
PackWeight(); PackWeight();
} }
ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvolutionImpl, this,
thread_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "conv error error_code[" << ret << "]"; MS_LOG(ERROR) << "conv error error_code[" << ret << "]";
} }

View File

@ -219,7 +219,8 @@ int ConvolutionWinogradCPUKernel::Run() {
InitWeightBias(); InitWeightBias();
} }
ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvolutionWinogradImpl,
this, thread_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]"; MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
} }

View File

@ -151,7 +151,8 @@ int CropAndResizeCPUKernel::Run() {
return ret; return ret;
} }
int error_code = ParallelLaunch(this->context_->thread_pool_, CropAndResizeImpl, this, context_->thread_num_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
CropAndResizeImpl, this, context_->thread_num_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "CropAndResize run error, error_code[" << error_code << "]"; MS_LOG(ERROR) << "CropAndResize run error, error_code[" << error_code << "]";
FreeTmpBuffer(); FreeTmpBuffer();

View File

@ -62,7 +62,8 @@ int CropCPUKernel::Run() {
return RET_OK; return RET_OK;
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, CropLaunch, this, crop_para_->thread_count_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, CropLaunch, this,
crop_para_->thread_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Crop launch fail!ret: " << ret; MS_LOG(ERROR) << "Crop launch fail!ret: " << ret;
return RET_ERROR; return RET_ERROR;

View File

@ -168,7 +168,8 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
packed_output_ = output_addr; packed_output_ = output_addr;
} }
ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwRun, this, conv_param_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeconvDwRun, this,
conv_param_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]"; MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]";
} }

View File

@ -222,7 +222,8 @@ int DeConvolutionCPUKernel::Run() {
RowMajor2Col12Major(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_); RowMajor2Col12Major(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
#endif #endif
error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp32Run, this, thread_count_); error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvFp32Run,
this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
FreeRunBuf(); FreeRunBuf();
@ -236,7 +237,7 @@ int DeConvolutionCPUKernel::Run() {
kernel::LiteKernel *CpuDeConvFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuDeConvFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
const lite::InnerContext *ctx, const kernel::KernelKey &desc) { const lite::Context *ctx, const kernel::KernelKey &desc) {
MS_ASSERT(op_parameter != nullptr); MS_ASSERT(op_parameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion); MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion);
@ -245,12 +246,15 @@ kernel::LiteKernel *CpuDeConvFp32KernelCreator(const std::vector<lite::Tensor *>
if (conv_param->group_ == 1) { if (conv_param->group_ == 1) {
if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) && if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) &&
(conv_param->dilation_w_ == 1 && conv_param->dilation_h_ == 1)) { (conv_param->dilation_w_ == 1 && conv_param->dilation_h_ == 1)) {
kernel = new (std::nothrow) kernel::DeConvolutionWinogradCPUKernel(op_parameter, inputs, outputs, ctx); kernel = new (std::nothrow) kernel::DeConvolutionWinogradCPUKernel(op_parameter, inputs, outputs,
static_cast<const lite::InnerContext *>(ctx));
} else { } else {
kernel = new (std::nothrow) kernel::DeConvolutionCPUKernel(op_parameter, inputs, outputs, ctx); kernel = new (std::nothrow)
kernel::DeConvolutionCPUKernel(op_parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
} }
} else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
kernel = new (std::nothrow) kernel::DeconvolutionDepthwiseCPUKernel(op_parameter, inputs, outputs, ctx); kernel = new (std::nothrow) kernel::DeconvolutionDepthwiseCPUKernel(op_parameter, inputs, outputs,
static_cast<const lite::InnerContext *>(ctx));
} else { } else {
MS_LOG(ERROR) << "deconv do not support group deconv!"; MS_LOG(ERROR) << "deconv do not support group deconv!";
kernel = nullptr; kernel = nullptr;

View File

@ -411,10 +411,12 @@ int DeConvolutionWinogradCPUKernel::Run() {
nhwc_output_ = src_out + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_; nhwc_output_ = src_out + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_;
::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float)); ::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float));
ParallelLaunch(this->context_->thread_pool_, DeConvWgFp32Run, this, deconv_param_->thread_num_); ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvWgFp32Run, this,
deconv_param_->thread_num_);
/*post bias activate and nhwc */ /*post bias activate and nhwc */
ParallelLaunch(this->context_->thread_pool_, DeConvWgPostFp32Run, this, thread_num_hw_); ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvWgPostFp32Run, this,
thread_num_hw_);
} }
FreeRunBuf(); FreeRunBuf();

View File

@ -55,7 +55,8 @@ int EluRun(void *cdata, int task_id) {
} }
int EluCPUKernel::Run() { int EluCPUKernel::Run() {
auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, EluRun, this,
op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]"; MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -87,7 +87,8 @@ int EmbeddingLookupCPUKernel::Run() {
memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum()); memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum());
dest_loc += in_tensors_.at(i)->ElementsNum(); dest_loc += in_tensors_.at(i)->ElementsNum();
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, EmbeddingLookupRun,
this, op_parameter_->thread_num_);
FreeRunBuff(); FreeRunBuff();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]"; MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]";

View File

@ -73,7 +73,8 @@ int ExpCPUKernel::Run() {
output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum(); exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum();
auto ret = ParallelLaunch(this->context_->thread_pool_, ExpRun, this, exp_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ExpRun, this,
exp_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Exp error: error_code[" << ret << "]"; MS_LOG(ERROR) << "Exp error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -91,7 +91,8 @@ int FillCPUKernel::Run() {
MS_LOG(ERROR) << "unsupported fill data type " << fill_input->data_type(); MS_LOG(ERROR) << "unsupported fill data type " << fill_input->data_type();
return RET_ERROR; return RET_ERROR;
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, FillRun, this, thread_sz_count_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, FillRun, this,
thread_sz_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "FillRun error error_code[" << ret << "]"; MS_LOG(ERROR) << "FillRun error error_code[" << ret << "]";
return ret; return ret;

View File

@ -93,7 +93,8 @@ int FusedBatchnormCPUKernel::Run() {
trained_ = true; // trained at least once trained_ = true; // trained at least once
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, BatchNormRun, this,
op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
} }

View File

@ -128,7 +128,8 @@ int GatherNdCPUKernel::Run() {
in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData()); in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
InitOffset(); InitOffset();
auto ret = ParallelLaunch(this->context_->thread_pool_, GatherNdRun, this, thread_sz_count_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, GatherNdRun, this,
thread_sz_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "gatherNd error error_code[" << ret << "]"; MS_LOG(ERROR) << "gatherNd error error_code[" << ret << "]";
return ret; return ret;

View File

@ -92,7 +92,8 @@ int GatherCPUKernel::Run() {
return ret; return ret;
} }
ret = ParallelLaunch(this->context_->thread_pool_, GatherRun, this, op_parameter_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, GatherRun, this,
op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]"; MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]";
} }

View File

@ -66,7 +66,8 @@ int InstanceNormCPUKernel::Run() {
gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->data_c()); gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->data_c());
beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->data_c()); beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->data_c());
dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c()); dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormRun, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, InstanceNormRun,
this, op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]"; MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]";
return ret; return ret;

View File

@ -146,7 +146,8 @@ int L2NormCPUKernel::Run() {
output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()); output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData());
if (l2_norm_param_->axis_num_ == 0 || l2_norm_param_->axis_num_ == input_shape.size()) { if (l2_norm_param_->axis_num_ == 0 || l2_norm_param_->axis_num_ == input_shape.size()) {
// all axis // all axis
auto ret = ParallelLaunch(this->context_->thread_pool_, SquareSumRun, this, context_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SquareSumRun, this,
context_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]"; MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;
@ -156,13 +157,15 @@ int L2NormCPUKernel::Run() {
sum += tmp_sum_[i]; sum += tmp_sum_[i];
} }
sqrt_sum_ = sqrt(sum > l2_norm_param_->epsilon_ ? sum : l2_norm_param_->epsilon_); sqrt_sum_ = sqrt(sum > l2_norm_param_->epsilon_ ? sum : l2_norm_param_->epsilon_);
ret = ParallelLaunch(this->context_->thread_pool_, L2NormRun, this, context_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, L2NormRun, this,
context_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]"; MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;
} }
} else if (l2_norm_param_->axis_num_ == 1 && l2_norm_param_->axis_[0] == static_cast<int>(input_shape.size()) - 1) { } else if (l2_norm_param_->axis_num_ == 1 && l2_norm_param_->axis_[0] == static_cast<int>(input_shape.size()) - 1) {
auto ret = ParallelLaunch(this->context_->thread_pool_, L2NormTrailingAxisRun, this, context_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
L2NormTrailingAxisRun, this, context_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]"; MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -92,7 +92,8 @@ int LayerNormCPUKernel::Run() {
mean_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float))); mean_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float)));
var_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float))); var_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float)));
} }
ret = ParallelLaunch(this->context_->thread_pool_, LayerNormRun, this, op_parameter_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, LayerNormRun, this,
op_parameter_->thread_num_);
if (out_tensors_.size() != 3) { if (out_tensors_.size() != 3) {
context_->allocator->Free(mean_data_); context_->allocator->Free(mean_data_);
context_->allocator->Free(var_data_); context_->allocator->Free(var_data_);

View File

@ -74,7 +74,8 @@ int LocalResponseNormRun(void *cdata, int task_id) {
} }
int LocalResponseNormCPUKernel::Run() { int LocalResponseNormCPUKernel::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, LocalResponseNormRun, this, thread_count_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
LocalResponseNormRun, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "LocalResponseNorm function error error_code[" << error_code << "]"; MS_LOG(ERROR) << "LocalResponseNorm function error error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -96,7 +96,8 @@ int LogSoftmaxLastAxisRun(void *cdata, int task_id) {
int LogSoftmaxCPUKernel::Run() { int LogSoftmaxCPUKernel::Run() {
int ret = RET_OK; int ret = RET_OK;
if (in_plane_size_ == 1) { if (in_plane_size_ == 1) {
ret = ParallelLaunch(this->context_->thread_pool_, LogSoftmaxLastAxisRun, this, context_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, LogSoftmaxLastAxisRun,
this, context_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "LogSoftmaxCPUKernel ParallelLaunch failed, ret: " << ret; MS_LOG(ERROR) << "LogSoftmaxCPUKernel ParallelLaunch failed, ret: " << ret;
} }

View File

@ -61,7 +61,8 @@ int LshProjectionCPUKernel::Run() {
if (ret != RET_OK) { if (ret != RET_OK) {
return ret; return ret;
} }
ret = ParallelLaunch(this->context_->thread_pool_, LshProjectionRun, this, op_parameter_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, LshProjectionRun, this,
op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "LshProjection kernel parallel launch failed"; MS_LOG(ERROR) << "LshProjection kernel parallel launch failed";
} }

View File

@ -332,7 +332,8 @@ int MatmulFp32BaseCPUKernel::Run() {
batch_b_ptr_ = b_pack_ptr_ + i * params_->deep_ * params_->col_align_; batch_b_ptr_ = b_pack_ptr_ + i * params_->deep_ * params_->col_align_;
batch_c_ptr_ = c_ptr + i * params_->row_ * params_->col_; batch_c_ptr_ = c_ptr + i * params_->row_ * params_->col_;
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, MatmulBaseFloatRun, this, thread_count_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, MatmulBaseFloatRun,
this, thread_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "MatmulBaseFloatRun failed"; MS_LOG(ERROR) << "MatmulBaseFloatRun failed";
return ret; return ret;

View File

@ -181,7 +181,8 @@ int OneHotCPUKernel::GetParams() {
} }
int OneHotCPUKernel::Run() { int OneHotCPUKernel::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, RunOneHot, this, context_->thread_num_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, RunOneHot,
this, context_->thread_num_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "OneHot function error error_code[" << error_code << "]"; MS_LOG(ERROR) << "OneHot function error error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -395,7 +395,8 @@ int PadCPUKernel::Run() {
output_data[i] = pad_param_->constant_value_; output_data[i] = pad_param_->constant_value_;
} }
} }
error_code = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, context_->thread_num_); error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PadImpl, this,
context_->thread_num_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]"; MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;
@ -408,7 +409,8 @@ int PadCPUKernel::Run() {
return error_code; return error_code;
} }
error_code = ParallelLaunch(this->context_->thread_pool_, MirrorPadImpl, this, context_->thread_num_); error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, MirrorPadImpl,
this, context_->thread_num_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << error_code << "]"; MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -85,7 +85,8 @@ int PoolingImpl(void *cdata, int task_id) {
} }
int PoolingCPUKernel::Run() { int PoolingCPUKernel::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, PoolingImpl, this, thread_count_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PoolingImpl,
this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]"; MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -41,7 +41,8 @@ int PowerImpl(void *cdata, int task_id) {
} }
int PowerCPUKernel::Run() { int PowerCPUKernel::Run() {
auto ret = ParallelLaunch(this->context_->thread_pool_, PowerImpl, this, thread_count_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PowerImpl, this,
thread_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "PowerCPUKernel error: " << ret; MS_LOG(ERROR) << "PowerCPUKernel error: " << ret;
return RET_ERROR; return RET_ERROR;

View File

@ -93,7 +93,8 @@ int PReluCPUKernel::Run() {
auto negative_slope_tensor = in_tensors_.at(1); auto negative_slope_tensor = in_tensors_.at(1);
prelu_param_->slope_ = reinterpret_cast<float *>(negative_slope_tensor->data_c()); prelu_param_->slope_ = reinterpret_cast<float *>(negative_slope_tensor->data_c());
auto ret = ParallelLaunch(this->context_->thread_pool_, PReluRun, this, prelu_param_->op_parameter_.thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PReluRun, this,
prelu_param_->op_parameter_.thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "PRelu Run error: error_code[" << ret << "]"; MS_LOG(ERROR) << "PRelu Run error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -117,7 +117,8 @@ int ReduceCPUKernel::Run() {
outer_size_ = outer_sizes_.at(i); outer_size_ = outer_sizes_.at(i);
inner_size_ = inner_sizes_.at(i); inner_size_ = inner_sizes_.at(i);
axis_size_ = axis_sizes_.at(i); axis_size_ = axis_sizes_.at(i);
auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceImpl, this, context_->thread_num_); auto error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ReduceImpl,
this, context_->thread_num_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]"; MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
FreeTmpBuffer(); FreeTmpBuffer();

View File

@ -205,7 +205,8 @@ int ResizeCPUKernel::RunImpl(int task_id) {
} }
int ResizeCPUKernel::Run() { int ResizeCPUKernel::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, ResizeImpl, this, context_->thread_num_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ResizeImpl,
this, context_->thread_num_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]"; MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]";
FreeTmpBuffer(); FreeTmpBuffer();

View File

@ -129,7 +129,8 @@ int ReverseCPUKernel::DoReverse(int task_id) {
int ReverseCPUKernel::Run() { int ReverseCPUKernel::Run() {
in_ptr_ = reinterpret_cast<float *>(in_tensors_[0]->MutableData()); in_ptr_ = reinterpret_cast<float *>(in_tensors_[0]->MutableData());
out_ptr_ = reinterpret_cast<float *>(out_tensors_[0]->MutableData()); out_ptr_ = reinterpret_cast<float *>(out_tensors_[0]->MutableData());
auto ret = ParallelLaunch(this->context_->thread_pool_, ReverseRun, this, thread_sz_count_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ReverseRun, this,
thread_sz_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Reverse run error error_code[" << ret << "]"; MS_LOG(ERROR) << "Reverse run error error_code[" << ret << "]";
return ret; return ret;

View File

@ -101,7 +101,8 @@ int ROIPoolingCPUKernel::Run() {
in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData()); in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData()); out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
roi_ptr_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); roi_ptr_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
auto ret = ParallelLaunch(this->context_->thread_pool_, ROIPoolingRun, this, param_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ROIPoolingRun, this,
param_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ROIPooling error: error_code[" << ret << "]"; MS_LOG(ERROR) << "ROIPooling error: error_code[" << ret << "]";
return ret; return ret;

View File

@ -188,7 +188,8 @@ int ScaleCPUKernel::Run() {
auto out_tensor = out_tensors_.front(); auto out_tensor = out_tensors_.front();
output_ptr_ = reinterpret_cast<float *>(out_tensor->MutableData()); output_ptr_ = reinterpret_cast<float *>(out_tensor->MutableData());
auto ret = ParallelLaunch(this->context_->thread_pool_, ScaleRun, this, op_parameter_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ScaleRun, this,
op_parameter_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -149,7 +149,8 @@ int ScatterNDRun(void *cdata, int task_id) {
} }
int ScatterNDCPUKernel::Run() { int ScatterNDCPUKernel::Run() {
auto ret = ParallelLaunch(this->context_->thread_pool_, ScatterNDRun, this, thread_n_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ScatterNDRun, this,
thread_n_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ScatterND error error_code[" << ret << "]"; MS_LOG(ERROR) << "ScatterND error error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -96,7 +96,8 @@ int SoftmaxLastAxisRun(void *cdata, int task_id) {
int SoftmaxCPUKernel::Run() { int SoftmaxCPUKernel::Run() {
int ret = RET_OK; int ret = RET_OK;
if (in_plane_size_ == 1) { if (in_plane_size_ == 1) {
ret = ParallelLaunch(this->context_->thread_pool_, SoftmaxLastAxisRun, this, context_->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SoftmaxLastAxisRun,
this, context_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "SoftmaxCPUKernel ParallelLaunch failed, ret: " << ret; MS_LOG(ERROR) << "SoftmaxCPUKernel ParallelLaunch failed, ret: " << ret;
} }

View File

@ -102,7 +102,8 @@ int SpaceToBatchCPUKernel::Run() {
} }
} }
ParallelLaunch(this->context_->thread_pool_, SpaceToBatchFp32Run, this, op_parameter_->thread_num_); ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SpaceToBatchFp32Run, this,
op_parameter_->thread_num_);
return RET_OK; return RET_OK;
} }

View File

@ -93,7 +93,8 @@ int SpaceToDepthCPUKernel::Run() {
input_ptr_ = reinterpret_cast<float *>(in_tensors_.at(0)->data_c()); input_ptr_ = reinterpret_cast<float *>(in_tensors_.at(0)->data_c());
output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c()); output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
if (in_tensors_.at(0)->format() == schema::Format::Format_NHWC) { if (in_tensors_.at(0)->format() == schema::Format::Format_NHWC) {
auto ret = ParallelLaunch(this->context_->thread_pool_, SpaceToDepthRun, this, thread_h_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SpaceToDepthRun,
this, thread_h_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "SpaceToDepth error error_code[" << ret << "]"; MS_LOG(ERROR) << "SpaceToDepth error error_code[" << ret << "]";
return ret; return ret;

View File

@ -175,7 +175,8 @@ int SparseToDenseCPUKernel::Run() {
} }
output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
count_unit_ = thread_count_ > 1 ? UP_DIV(index_num, thread_count_) : index_num; count_unit_ = thread_count_ > 1 ? UP_DIV(index_num, thread_count_) : index_num;
ret = ParallelLaunch(this->context_->thread_pool_, SparseToDenseRun, this, s2d_param->thread_num_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SparseToDenseRun, this,
s2d_param->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "SparseToDenseRun error: error_code[" << ret << "]"; MS_LOG(ERROR) << "SparseToDenseRun error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -159,7 +159,8 @@ int TransposeCPUKernel::Run() {
thread_count_ = op_parameter_->thread_num_; thread_count_ = op_parameter_->thread_num_;
GetNHNCTransposeFunc(in_tensor, out_tensor, param_); GetNHNCTransposeFunc(in_tensor, out_tensor, param_);
if (NHNCTransposeFunc_ != nullptr) { if (NHNCTransposeFunc_ != nullptr) {
auto ret = ParallelLaunch(this->context_->thread_pool_, TransposeImpl, this, thread_count_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, TransposeImpl,
this, thread_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "NHNCTransposeFunc_ is error!"; MS_LOG(ERROR) << "NHNCTransposeFunc_ is error!";
} }
@ -187,7 +188,8 @@ int TransposeCPUKernel::Run() {
} }
int ret; int ret;
if (dims_ > MAX_TRANSPOSE_DIM_SIZE) { if (dims_ > MAX_TRANSPOSE_DIM_SIZE) {
ret = ParallelLaunch(this->context_->thread_pool_, TransposeImpl, this, thread_count_); ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, TransposeImpl, this,
thread_count_);
} else { } else {
ret = DoTransposeFp32(in_data_, out_data_, out_shape_, param_); ret = DoTransposeFp32(in_data_, out_data_, out_shape_, param_);
} }

View File

@ -133,7 +133,8 @@ int WhereCPUKernel::RunWithTripleInputs() {
MS_LOG(ERROR) << "Error, inputs' length are zero !!!"; MS_LOG(ERROR) << "Error, inputs' length are zero !!!";
return RET_ERROR; return RET_ERROR;
} }
auto ret = ParallelLaunch(this->context_->thread_pool_, WhereRun, this, where_param_->thread_num_); auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, WhereRun, this,
where_param_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "WhereDwRun error: error_code[" << ret << "]"; MS_LOG(ERROR) << "WhereDwRun error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -98,7 +98,8 @@ int ActivationGradRun(void *cdata, int task_id) {
} }
int ActivationGradCPUKernel::Run() { int ActivationGradCPUKernel::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationGradRun, this, thread_count_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
ActivationGradRun, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]"; MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;

View File

@ -102,7 +102,8 @@ int AdamRun(void *cdata, int task_id) {
} }
int AdamCPUKernel::Run() { int AdamCPUKernel::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, AdamRun, this, thread_count_); int error_code =
ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, AdamRun, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Adam function error error_code[" << error_code << "]"; MS_LOG(ERROR) << "Adam function error error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;
@ -145,9 +146,10 @@ int AdamCPUKernel::OptimizerStep() {
kernel::LiteKernel *CpuAdamFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuAdamFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const lite::InnerContext *ctx, const kernel::KernelKey &desc) { const lite::Context *ctx, const kernel::KernelKey &desc) {
MS_ASSERT(desc.type == schema::PrimitiveType_Adam); MS_ASSERT(desc.type == schema::PrimitiveType_Adam);
auto *kernel = new (std::nothrow) AdamCPUKernel(opParameter, inputs, outputs, ctx); auto *kernel =
new (std::nothrow) AdamCPUKernel(opParameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new AdamCPUKernel fail!"; MS_LOG(ERROR) << "new AdamCPUKernel fail!";
free(opParameter); free(opParameter);

View File

@ -82,7 +82,8 @@ int ApplyMomentumRun(void *cdata, int task_id) {
} }
int ApplyMomentumCPUKernel::Run() { int ApplyMomentumCPUKernel::Run() {
int error_code = ParallelLaunch(this->context_->thread_pool_, ApplyMomentumRun, this, thread_count_); int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
ApplyMomentumRun, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Apply Momentum function error error_code[" << error_code << "]"; MS_LOG(ERROR) << "Apply Momentum function error error_code[" << error_code << "]";
return RET_ERROR; return RET_ERROR;
@ -119,10 +120,11 @@ int ApplyMomentumCPUKernel::OptimizerStep() {
kernel::LiteKernel *CpuApplyMomentumFp32KernelCreator(const std::vector<lite::Tensor *> &inputs, kernel::LiteKernel *CpuApplyMomentumFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const std::vector<lite::Tensor *> &outputs,
OpParameter *opParameter, const lite::InnerContext *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) { const kernel::KernelKey &desc) {
MS_ASSERT(desc.type == schema::PrimitiveType_ApplyMomentum); MS_ASSERT(desc.type == schema::PrimitiveType_ApplyMomentum);
auto *kernel = new (std::nothrow) ApplyMomentumCPUKernel(opParameter, inputs, outputs, ctx); auto *kernel = new (std::nothrow)
ApplyMomentumCPUKernel(opParameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new ApplyMomentumCPUKernel fail!"; MS_LOG(ERROR) << "new ApplyMomentumCPUKernel fail!";
free(opParameter); free(opParameter);

Some files were not shown because too many files have changed in this diff Show More